SandboxFilter.cpp (83852B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "SandboxFilter.h" 8 9 #include <asm/ioctls.h> // For TCGETS2 10 #include <asm/termbits.h> // For termios2 11 #include <errno.h> 12 #include <fcntl.h> 13 #include <linux/ioctl.h> 14 #include <linux/ipc.h> 15 #include <linux/memfd.h> 16 #include <linux/mman.h> 17 #include <linux/net.h> 18 #include <linux/sched.h> 19 #include <linux/sockios.h> 20 #include <string.h> 21 #include <sys/ioctl.h> 22 #include <sys/mman.h> 23 #include <sys/prctl.h> 24 #include <sys/socket.h> 25 #include <sys/syscall.h> 26 #include <sys/un.h> 27 #include <sys/utsname.h> 28 #include <time.h> 29 #include <unistd.h> 30 // This has to go after <sys/socket.h> for annoying reasons 31 #include <linux/wireless.h> 32 33 #include <algorithm> 34 #include <utility> 35 36 #include "PlatformMacros.h" 37 #include "Sandbox.h" // for ContentProcessSandboxParams 38 #include "SandboxBrokerClient.h" 39 #include "SandboxFilterUtil.h" 40 #include "SandboxInfo.h" 41 #include "SandboxInternal.h" 42 #include "SandboxLogging.h" 43 #include "SandboxOpenedFiles.h" 44 #include "mozilla/PodOperations.h" 45 #include "mozilla/ProcInfo_linux.h" 46 #include "mozilla/UniquePtr.h" 47 #include "prenv.h" 48 #include "sandbox/linux/bpf_dsl/bpf_dsl.h" 49 #include "sandbox/linux/system_headers/linux_seccomp.h" 50 #include "sandbox/linux/system_headers/linux_syscalls.h" 51 52 #if defined(GP_PLAT_amd64_linux) && defined(GP_ARCH_amd64) && \ 53 defined(MOZ_USING_WASM_SANDBOXING) 54 # include <asm/prctl.h> // For ARCH_SET_GS 55 #endif 56 57 using namespace sandbox::bpf_dsl; 58 59 // Fill in defines in case of old headers. 60 // (Warning: these are wrong on PA-RISC.) 61 #ifndef MADV_HUGEPAGE 62 # define MADV_HUGEPAGE 14 63 #endif 64 #ifndef MADV_NOHUGEPAGE 65 # define MADV_NOHUGEPAGE 15 66 #endif 67 #ifndef MADV_DONTDUMP 68 # define MADV_DONTDUMP 16 69 #endif 70 71 // Added in Linux 4.5; see bug 1303813. 72 #ifndef MADV_FREE 73 # define MADV_FREE 8 74 #endif 75 76 #ifndef PR_SET_PTRACER 77 # define PR_SET_PTRACER 0x59616d61 78 #endif 79 80 // Linux 5.17+ 81 #ifndef PR_SET_VMA 82 # define PR_SET_VMA 0x53564d41 83 #endif 84 #ifndef PR_SET_VMA_ANON_NAME 85 # define PR_SET_VMA_ANON_NAME 0 86 #endif 87 88 // The GNU libc headers define O_LARGEFILE as 0 on x86_64, but we need the 89 // actual value because it shows up in file flags. 90 #if !defined(O_LARGEFILE) || O_LARGEFILE == 0 91 # define O_LARGEFILE_REAL 00100000 92 #else 93 # define O_LARGEFILE_REAL O_LARGEFILE 94 #endif 95 96 // Not part of UAPI, but userspace sees it in F_GETFL; see bug 1650751. 97 #define FMODE_NONOTIFY 0x4000000 98 99 #ifndef F_LINUX_SPECIFIC_BASE 100 # define F_LINUX_SPECIFIC_BASE 1024 101 #else 102 static_assert(F_LINUX_SPECIFIC_BASE == 1024); 103 #endif 104 105 #ifndef F_ADD_SEALS 106 # define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) 107 # define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) 108 #else 109 static_assert(F_ADD_SEALS == (F_LINUX_SPECIFIC_BASE + 9)); 110 static_assert(F_GET_SEALS == (F_LINUX_SPECIFIC_BASE + 10)); 111 #endif 112 113 // Added in 6.13 114 #ifndef MADV_GUARD_INSTALL 115 # define MADV_GUARD_INSTALL 102 116 # define MADV_GUARD_REMOVE 103 117 #else 118 static_assert(MADV_GUARD_INSTALL == 102); 119 static_assert(MADV_GUARD_REMOVE == 103); 120 #endif 121 122 // Added in 4.14 123 #ifndef MFD_HUGETLB 124 # define MFD_HUGETLB 4U 125 # define MFD_HUGE_MASK MAP_HUGE_MASK 126 # define MFD_HUGE_SHIFT MAP_HUGE_SHIFT 127 #else 128 static_assert(MFD_HUGE_MASK == MAP_HUGE_MASK); 129 static_assert(MFD_HUGE_SHIFT == MAP_HUGE_SHIFT); 130 #endif 131 132 // Added in 6.10 133 #ifndef F_DUPFD_QUERY 134 # define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) 135 #else 136 static_assert(F_DUPFD_QUERY == (F_LINUX_SPECIFIC_BASE + 3)); 137 #endif 138 139 // To avoid visual confusion between "ifdef ANDROID" and "ifndef ANDROID": 140 #ifndef ANDROID 141 # define DESKTOP 142 #endif 143 144 namespace { 145 static const unsigned long kIoctlTypeMask = _IOC_TYPEMASK << _IOC_TYPESHIFT; 146 static const unsigned long kTtyIoctls = TIOCSTI & kIoctlTypeMask; 147 // On some older architectures (but not x86 or ARM), ioctls are 148 // assigned type fields differently, and the TIOC/TC/FIO group 149 // isn't all the same type. If/when we support those archs, 150 // this would need to be revised (but really this should be a 151 // default-deny policy; see below). 152 static_assert(kTtyIoctls == (TCSETA & kIoctlTypeMask) && 153 kTtyIoctls == (FIOASYNC & kIoctlTypeMask), 154 "tty-related ioctls use the same type"); 155 }; // namespace 156 157 // This file defines the seccomp-bpf system call filter policies. 158 // See also SandboxFilterUtil.h, for the CASES_FOR_* macros and 159 // SandboxFilterBase::Evaluate{Socket,Ipc}Call. 160 // 161 // One important difference from how Chromium bpf_dsl filters are 162 // normally interpreted: returning -ENOSYS from a Trap() handler 163 // indicates an unexpected system call; SigSysHandler() in Sandbox.cpp 164 // will detect this, request a crash dump, and terminate the process. 165 // This does not apply to using Error(ENOSYS) in the policy, so that 166 // can be used if returning an actual ENOSYS is needed. 167 168 namespace mozilla { 169 170 // This class allows everything used by the sandbox itself, by the 171 // core IPC code, by the crash reporter, or other core code. It also 172 // contains support for brokering file operations, but file access is 173 // denied if no broker client is provided by the concrete class. 174 class SandboxPolicyCommon : public SandboxPolicyBase { 175 protected: 176 // Subclasses can assign these in their constructors to loosen the 177 // default settings. 178 SandboxBrokerClient* mBroker = nullptr; 179 bool mMayCreateShmem = false; 180 bool mAllowUnsafeSocketPair = false; 181 bool mBrokeredConnect = false; // Can connect() be brokered? 182 183 SandboxPolicyCommon() = default; 184 185 typedef const arch_seccomp_data& ArgsRef; 186 187 static intptr_t BlockedSyscallTrap(ArgsRef aArgs, void* aux) { 188 MOZ_ASSERT(!aux); 189 return -ENOSYS; 190 } 191 192 // Convert Unix-style "return -1 and set errno" APIs back into the 193 // Linux ABI "return -err" style. 194 static intptr_t ConvertError(long rv) { return rv < 0 ? -errno : rv; } 195 196 template <typename... Args> 197 static intptr_t DoSyscall(long nr, Args... args) { 198 static_assert(std::conjunction_v< 199 std::conditional_t<(sizeof(Args) <= sizeof(void*)), 200 std::true_type, std::false_type>...>, 201 "each syscall arg is at most one word"); 202 return ConvertError(syscall(nr, args...)); 203 } 204 205 // Mesa's amdgpu driver uses kcmp with KCMP_FILE; see also bug 206 // 1624743. This policy restricts it to the process's own pid, 207 // which should be sufficient on its own if we need to remove the 208 // `type` restriction in the future. 209 // 210 // (Note: if we end up with more Mesa-specific hooks needed in 211 // several process types, we could put them into this class's 212 // EvaluateSyscall guarded by a boolean member variable, or 213 // introduce another layer of subclassing.) 214 ResultExpr KcmpPolicyForMesa() const { 215 // The real KCMP_FILE is part of an anonymous enum in 216 // <linux/kcmp.h>, but we can't depend on having that header, 217 // and it's not a #define so the usual #ifndef approach 218 // doesn't work. 219 static const int kKcmpFile = 0; 220 const pid_t myPid = getpid(); 221 Arg<pid_t> pid1(0), pid2(1); 222 Arg<int> type(2); 223 return If(AllOf(pid1 == myPid, pid2 == myPid, type == kKcmpFile), Allow()) 224 .Else(InvalidSyscall()); 225 } 226 227 static intptr_t SchedTrap(ArgsRef aArgs, void* aux) { 228 const pid_t tid = syscall(__NR_gettid); 229 if (aArgs.args[0] == static_cast<uint64_t>(tid)) { 230 return DoSyscall(aArgs.nr, 0, static_cast<uintptr_t>(aArgs.args[1]), 231 static_cast<uintptr_t>(aArgs.args[2]), 232 static_cast<uintptr_t>(aArgs.args[3]), 233 static_cast<uintptr_t>(aArgs.args[4]), 234 static_cast<uintptr_t>(aArgs.args[5])); 235 } 236 return -EPERM; 237 } 238 239 private: 240 // Bug 1093893: Translate tkill to tgkill for pthread_kill; fixed in 241 // bionic commit 10c8ce59a (in JB and up; API level 16 = Android 4.1). 242 // Bug 1376653: musl also needs this, and security-wise it's harmless. 243 static intptr_t TKillCompatTrap(ArgsRef aArgs, void* aux) { 244 auto tid = static_cast<pid_t>(aArgs.args[0]); 245 auto sig = static_cast<int>(aArgs.args[1]); 246 return DoSyscall(__NR_tgkill, getpid(), tid, sig); 247 } 248 249 static intptr_t SetNoNewPrivsTrap(ArgsRef& aArgs, void* aux) { 250 if (gSetSandboxFilter == nullptr) { 251 // Called after BroadcastSetThreadSandbox finished, therefore 252 // not our doing and not expected. 253 return BlockedSyscallTrap(aArgs, nullptr); 254 } 255 // Signal that the filter is already in place. 256 return -ETXTBSY; 257 } 258 259 // Trap handlers for filesystem brokering. 260 // (The amount of code duplication here could be improved....) 261 #ifdef __NR_open 262 static intptr_t OpenTrap(ArgsRef aArgs, void* aux) { 263 auto broker = static_cast<SandboxBrokerClient*>(aux); 264 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 265 auto flags = static_cast<int>(aArgs.args[1]); 266 return broker->Open(path, flags); 267 } 268 269 static intptr_t AccessTrap(ArgsRef aArgs, void* aux) { 270 auto broker = static_cast<SandboxBrokerClient*>(aux); 271 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 272 auto mode = static_cast<int>(aArgs.args[1]); 273 return broker->Access(path, mode); 274 } 275 276 static intptr_t StatTrap(ArgsRef aArgs, void* aux) { 277 auto broker = static_cast<SandboxBrokerClient*>(aux); 278 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 279 auto buf = reinterpret_cast<statstruct*>(aArgs.args[1]); 280 return broker->Stat(path, buf); 281 } 282 283 static intptr_t LStatTrap(ArgsRef aArgs, void* aux) { 284 auto broker = static_cast<SandboxBrokerClient*>(aux); 285 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 286 auto buf = reinterpret_cast<statstruct*>(aArgs.args[1]); 287 return broker->LStat(path, buf); 288 } 289 290 static intptr_t ChmodTrap(ArgsRef aArgs, void* aux) { 291 auto broker = static_cast<SandboxBrokerClient*>(aux); 292 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 293 auto mode = static_cast<mode_t>(aArgs.args[1]); 294 return broker->Chmod(path, mode); 295 } 296 297 static intptr_t LinkTrap(ArgsRef aArgs, void* aux) { 298 auto broker = static_cast<SandboxBrokerClient*>(aux); 299 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 300 auto path2 = reinterpret_cast<const char*>(aArgs.args[1]); 301 return broker->Link(path, path2); 302 } 303 304 static intptr_t SymlinkTrap(ArgsRef aArgs, void* aux) { 305 auto broker = static_cast<SandboxBrokerClient*>(aux); 306 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 307 auto path2 = reinterpret_cast<const char*>(aArgs.args[1]); 308 return broker->Symlink(path, path2); 309 } 310 311 static intptr_t RenameTrap(ArgsRef aArgs, void* aux) { 312 auto broker = static_cast<SandboxBrokerClient*>(aux); 313 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 314 auto path2 = reinterpret_cast<const char*>(aArgs.args[1]); 315 return broker->Rename(path, path2); 316 } 317 318 static intptr_t MkdirTrap(ArgsRef aArgs, void* aux) { 319 auto broker = static_cast<SandboxBrokerClient*>(aux); 320 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 321 auto mode = static_cast<mode_t>(aArgs.args[1]); 322 return broker->Mkdir(path, mode); 323 } 324 325 static intptr_t RmdirTrap(ArgsRef aArgs, void* aux) { 326 auto broker = static_cast<SandboxBrokerClient*>(aux); 327 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 328 return broker->Rmdir(path); 329 } 330 331 static intptr_t UnlinkTrap(ArgsRef aArgs, void* aux) { 332 auto broker = static_cast<SandboxBrokerClient*>(aux); 333 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 334 if (path && path[0] == '\0') { 335 // If the path is empty, then just fail the call here 336 return -ENOENT; 337 } 338 return broker->Unlink(path); 339 } 340 341 static intptr_t ReadlinkTrap(ArgsRef aArgs, void* aux) { 342 auto broker = static_cast<SandboxBrokerClient*>(aux); 343 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 344 auto buf = reinterpret_cast<char*>(aArgs.args[1]); 345 auto size = static_cast<size_t>(aArgs.args[2]); 346 return broker->Readlink(path, buf, size); 347 } 348 #endif // __NR_open 349 350 static intptr_t OpenAtTrap(ArgsRef aArgs, void* aux) { 351 auto broker = static_cast<SandboxBrokerClient*>(aux); 352 auto fd = static_cast<int>(aArgs.args[0]); 353 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 354 auto flags = static_cast<int>(aArgs.args[2]); 355 if (fd != AT_FDCWD && path[0] != '/') { 356 SANDBOX_LOG("unsupported fd-relative openat(%d, \"%s\", 0%o)", fd, path, 357 flags); 358 return BlockedSyscallTrap(aArgs, nullptr); 359 } 360 return broker->Open(path, flags); 361 } 362 363 static intptr_t AccessAtTrap(ArgsRef aArgs, void* aux) { 364 auto broker = static_cast<SandboxBrokerClient*>(aux); 365 auto fd = static_cast<int>(aArgs.args[0]); 366 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 367 auto mode = static_cast<int>(aArgs.args[2]); 368 // Linux's faccessat syscall has no "flags" argument. Attempting 369 // to handle the flags != 0 case is left to userspace; this is 370 // impossible to do correctly in all cases, but that's not our 371 // problem. 372 // 373 // Starting with kernel 5.8+ and glibc 2.33, there is faccessat2 that 374 // supports flags, handled below. 375 if (fd != AT_FDCWD && path[0] != '/') { 376 SANDBOX_LOG("unsupported fd-relative faccessat(%d, \"%s\", %d)", fd, path, 377 mode); 378 return BlockedSyscallTrap(aArgs, nullptr); 379 } 380 return broker->Access(path, mode); 381 } 382 383 static intptr_t AccessAt2Trap(ArgsRef aArgs, void* aux) { 384 auto* broker = static_cast<SandboxBrokerClient*>(aux); 385 auto fd = static_cast<int>(aArgs.args[0]); 386 const auto* path = reinterpret_cast<const char*>(aArgs.args[1]); 387 auto mode = static_cast<int>(aArgs.args[2]); 388 auto flags = static_cast<int>(aArgs.args[3]); 389 if (fd != AT_FDCWD && path[0] != '/') { 390 SANDBOX_LOG("unsupported fd-relative faccessat2(%d, \"%s\", %d, %d)", fd, 391 path, mode, flags); 392 return BlockedSyscallTrap(aArgs, nullptr); 393 } 394 if ((flags & ~AT_EACCESS) == 0) { 395 return broker->Access(path, mode); 396 } 397 return ConvertError(ENOSYS); 398 } 399 400 static intptr_t StatAtTrap(ArgsRef aArgs, void* aux) { 401 auto broker = static_cast<SandboxBrokerClient*>(aux); 402 auto fd = static_cast<int>(aArgs.args[0]); 403 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 404 auto buf = reinterpret_cast<statstruct*>(aArgs.args[2]); 405 auto flags = static_cast<int>(aArgs.args[3]); 406 407 if (fd != AT_FDCWD && (flags & AT_EMPTY_PATH) && path && 408 !strcmp(path, "")) { 409 #ifdef __NR_fstat64 410 return DoSyscall(__NR_fstat64, fd, buf); 411 #else 412 return DoSyscall(__NR_fstat, fd, buf); 413 #endif 414 } 415 416 if (!broker) { 417 return BlockedSyscallTrap(aArgs, nullptr); 418 } 419 420 if (fd != AT_FDCWD && path && path[0] != '/') { 421 SANDBOX_LOG("unsupported fd-relative fstatat(%d, \"%s\", %p, 0x%x)", fd, 422 path, buf, flags); 423 return BlockedSyscallTrap(aArgs, nullptr); 424 } 425 426 int badFlags = flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT); 427 if (badFlags != 0) { 428 SANDBOX_LOG("unsupported flags 0x%x in fstatat(%d, \"%s\", %p, 0x%x)", 429 badFlags, fd, path, buf, flags); 430 return BlockedSyscallTrap(aArgs, nullptr); 431 } 432 return (flags & AT_SYMLINK_NOFOLLOW) == 0 ? broker->Stat(path, buf) 433 : broker->LStat(path, buf); 434 } 435 436 static intptr_t ChmodAtTrap(ArgsRef aArgs, void* aux) { 437 auto broker = static_cast<SandboxBrokerClient*>(aux); 438 auto fd = static_cast<int>(aArgs.args[0]); 439 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 440 auto mode = static_cast<mode_t>(aArgs.args[2]); 441 auto flags = static_cast<int>(aArgs.args[3]); 442 if (fd != AT_FDCWD && path[0] != '/') { 443 SANDBOX_LOG("unsupported fd-relative chmodat(%d, \"%s\", 0%o, %d)", fd, 444 path, mode, flags); 445 return BlockedSyscallTrap(aArgs, nullptr); 446 } 447 if (flags != 0) { 448 SANDBOX_LOG("unsupported flags in chmodat(%d, \"%s\", 0%o, %d)", fd, path, 449 mode, flags); 450 return BlockedSyscallTrap(aArgs, nullptr); 451 } 452 return broker->Chmod(path, mode); 453 } 454 455 static intptr_t LinkAtTrap(ArgsRef aArgs, void* aux) { 456 auto broker = static_cast<SandboxBrokerClient*>(aux); 457 auto fd = static_cast<int>(aArgs.args[0]); 458 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 459 auto fd2 = static_cast<int>(aArgs.args[2]); 460 auto path2 = reinterpret_cast<const char*>(aArgs.args[3]); 461 auto flags = static_cast<int>(aArgs.args[4]); 462 if ((fd != AT_FDCWD && path[0] != '/') || 463 (fd2 != AT_FDCWD && path2[0] != '/')) { 464 SANDBOX_LOG( 465 "unsupported fd-relative linkat(%d, \"%s\", %d, \"%s\", 0x%x)", fd, 466 path, fd2, path2, flags); 467 return BlockedSyscallTrap(aArgs, nullptr); 468 } 469 if (flags != 0) { 470 SANDBOX_LOG("unsupported flags in linkat(%d, \"%s\", %d, \"%s\", 0x%x)", 471 fd, path, fd2, path2, flags); 472 return BlockedSyscallTrap(aArgs, nullptr); 473 } 474 return broker->Link(path, path2); 475 } 476 477 static intptr_t SymlinkAtTrap(ArgsRef aArgs, void* aux) { 478 auto broker = static_cast<SandboxBrokerClient*>(aux); 479 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 480 auto fd2 = static_cast<int>(aArgs.args[1]); 481 auto path2 = reinterpret_cast<const char*>(aArgs.args[2]); 482 if (fd2 != AT_FDCWD && path2[0] != '/') { 483 SANDBOX_LOG("unsupported fd-relative symlinkat(\"%s\", %d, \"%s\")", path, 484 fd2, path2); 485 return BlockedSyscallTrap(aArgs, nullptr); 486 } 487 return broker->Symlink(path, path2); 488 } 489 490 static intptr_t RenameAtTrap(ArgsRef aArgs, void* aux) { 491 auto broker = static_cast<SandboxBrokerClient*>(aux); 492 auto fd = static_cast<int>(aArgs.args[0]); 493 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 494 auto fd2 = static_cast<int>(aArgs.args[2]); 495 auto path2 = reinterpret_cast<const char*>(aArgs.args[3]); 496 if ((fd != AT_FDCWD && path[0] != '/') || 497 (fd2 != AT_FDCWD && path2[0] != '/')) { 498 SANDBOX_LOG("unsupported fd-relative renameat(%d, \"%s\", %d, \"%s\")", 499 fd, path, fd2, path2); 500 return BlockedSyscallTrap(aArgs, nullptr); 501 } 502 return broker->Rename(path, path2); 503 } 504 505 static intptr_t MkdirAtTrap(ArgsRef aArgs, void* aux) { 506 auto broker = static_cast<SandboxBrokerClient*>(aux); 507 auto fd = static_cast<int>(aArgs.args[0]); 508 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 509 auto mode = static_cast<mode_t>(aArgs.args[2]); 510 if (fd != AT_FDCWD && path[0] != '/') { 511 SANDBOX_LOG("unsupported fd-relative mkdirat(%d, \"%s\", 0%o)", fd, path, 512 mode); 513 return BlockedSyscallTrap(aArgs, nullptr); 514 } 515 return broker->Mkdir(path, mode); 516 } 517 518 static intptr_t UnlinkAtTrap(ArgsRef aArgs, void* aux) { 519 auto broker = static_cast<SandboxBrokerClient*>(aux); 520 auto fd = static_cast<int>(aArgs.args[0]); 521 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 522 auto flags = static_cast<int>(aArgs.args[2]); 523 if (path && path[0] == '\0') { 524 // If the path is empty, then just fail the call here 525 return -ENOENT; 526 } 527 if (fd != AT_FDCWD && path[0] != '/') { 528 SANDBOX_LOG("unsupported fd-relative unlinkat(%d, \"%s\", 0x%x)", fd, 529 path, flags); 530 return BlockedSyscallTrap(aArgs, nullptr); 531 } 532 int badFlags = flags & ~AT_REMOVEDIR; 533 if (badFlags != 0) { 534 SANDBOX_LOG("unsupported flags 0x%x in unlinkat(%d, \"%s\", 0x%x)", 535 badFlags, fd, path, flags); 536 return BlockedSyscallTrap(aArgs, nullptr); 537 } 538 return (flags & AT_REMOVEDIR) == 0 ? broker->Unlink(path) 539 : broker->Rmdir(path); 540 } 541 542 static intptr_t ReadlinkAtTrap(ArgsRef aArgs, void* aux) { 543 auto broker = static_cast<SandboxBrokerClient*>(aux); 544 auto fd = static_cast<int>(aArgs.args[0]); 545 auto path = reinterpret_cast<const char*>(aArgs.args[1]); 546 auto buf = reinterpret_cast<char*>(aArgs.args[2]); 547 auto size = static_cast<size_t>(aArgs.args[3]); 548 if (fd != AT_FDCWD && path[0] != '/') { 549 SANDBOX_LOG("unsupported fd-relative readlinkat(%d, %s, %p, %d)", fd, 550 path, buf, size); 551 return BlockedSyscallTrap(aArgs, nullptr); 552 } 553 return broker->Readlink(path, buf, size); 554 } 555 556 static intptr_t SocketpairDatagramTrap(ArgsRef aArgs, void* aux) { 557 auto fds = reinterpret_cast<int*>(aArgs.args[3]); 558 // Return sequential packet sockets instead of the expected 559 // datagram sockets; see bug 1355274 for details. 560 return ConvertError(socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds)); 561 } 562 563 static intptr_t SocketcallUnpackTrap(ArgsRef aArgs, void* aux) { 564 #ifdef __NR_socketcall 565 auto argsPtr = reinterpret_cast<const unsigned long*>(aArgs.args[1]); 566 int sysno = -1; 567 568 // When Linux added separate syscalls for socket operations on the 569 // old socketcall platforms, they had long since stopped adding 570 // send and recv syscalls, because they can be trivially mapped 571 // onto sendto and recvfrom (see also open vs. openat). 572 // 573 // But, socketcall itself *does* have separate calls for those. 574 // So, we need to remap them; since send(to) and recv(from) 575 // have basically the same types except for const, the code is 576 // factored out here. 577 unsigned long altArgs[6]; 578 auto legacySendRecvWorkaround = [&] { 579 MOZ_ASSERT(argsPtr != altArgs); 580 memcpy(altArgs, argsPtr, sizeof(unsigned long[4])); 581 altArgs[4] = altArgs[5] = 0; 582 argsPtr = altArgs; 583 }; 584 585 switch (aArgs.args[0]) { 586 // See also the other socketcall table in SandboxFilterUtil.cpp 587 # define DISPATCH_SOCKETCALL(this_sysno, this_call) \ 588 case this_call: \ 589 sysno = this_sysno; \ 590 break 591 592 DISPATCH_SOCKETCALL(__NR_socketpair, SYS_SOCKETPAIR); 593 DISPATCH_SOCKETCALL(__NR_getsockopt, SYS_GETSOCKOPT); 594 DISPATCH_SOCKETCALL(__NR_sendmsg, SYS_SENDMSG); 595 DISPATCH_SOCKETCALL(__NR_recvmsg, SYS_RECVMSG); 596 DISPATCH_SOCKETCALL(__NR_sendto, SYS_SENDTO); 597 DISPATCH_SOCKETCALL(__NR_recvfrom, SYS_RECVFROM); 598 DISPATCH_SOCKETCALL(__NR_sendmmsg, SYS_SENDMMSG); 599 DISPATCH_SOCKETCALL(__NR_recvmmsg, SYS_RECVMMSG); 600 // __NR_recvmmsg_time64 is not available as a socketcall; a 601 // Y2K38-ready userland would call it directly. 602 # undef DISPATCH_SOCKETCALL 603 604 case SYS_SEND: 605 sysno = __NR_sendto; 606 legacySendRecvWorkaround(); 607 break; 608 case SYS_RECV: 609 sysno = __NR_recvfrom; 610 legacySendRecvWorkaround(); 611 break; 612 } 613 614 // This assert will fail if someone tries to map a socketcall to 615 // this trap without adding it to the switch statement above. 616 MOZ_RELEASE_ASSERT(sysno >= 0); 617 618 return DoSyscall(sysno, argsPtr[0], argsPtr[1], argsPtr[2], argsPtr[3], 619 argsPtr[4], argsPtr[5]); 620 621 #else // no socketcall 622 MOZ_CRASH("unreachable?"); 623 return -ENOSYS; 624 #endif 625 } 626 627 // This just needs to return something to stand in for the 628 // unconnected socket until ConnectTrap, below, and keep track of 629 // the socket type somehow. Half a socketpair *is* a socket, so it 630 // should result in minimal confusion in the caller. 631 static intptr_t FakeSocketTrapCommon(int domain, int type, int protocol) { 632 int fds[2]; 633 // X11 client libs will still try to getaddrinfo() even for a 634 // local connection. Also, WebRTC still has vestigial network 635 // code trying to do things in the content process. Politely tell 636 // them no. 637 if (domain != AF_UNIX) { 638 return -EAFNOSUPPORT; 639 } 640 if (socketpair(domain, type, protocol, fds) != 0) { 641 return -errno; 642 } 643 close(fds[1]); 644 return fds[0]; 645 } 646 647 static intptr_t FakeSocketTrap(ArgsRef aArgs, void* aux) { 648 return FakeSocketTrapCommon(static_cast<int>(aArgs.args[0]), 649 static_cast<int>(aArgs.args[1]), 650 static_cast<int>(aArgs.args[2])); 651 } 652 653 static intptr_t FakeSocketTrapLegacy(ArgsRef aArgs, void* aux) { 654 const auto innerArgs = reinterpret_cast<unsigned long*>(aArgs.args[1]); 655 656 return FakeSocketTrapCommon(static_cast<int>(innerArgs[0]), 657 static_cast<int>(innerArgs[1]), 658 static_cast<int>(innerArgs[2])); 659 } 660 661 static Maybe<int> DoGetSockOpt(int fd, int optname) { 662 int optval; 663 socklen_t optlen = sizeof(optval); 664 665 if (getsockopt(fd, SOL_SOCKET, optname, &optval, &optlen) != 0) { 666 return Nothing(); 667 } 668 MOZ_RELEASE_ASSERT(static_cast<size_t>(optlen) == sizeof(optval)); 669 return Some(optval); 670 } 671 672 // Substitute the newly connected socket from the broker for the 673 // original socket. This is meant to be used on a fd from 674 // FakeSocketTrap, above, but it should also work to simulate 675 // re-connect()ing a real connected socket. 676 // 677 // Warning: This isn't quite right if the socket is dup()ed, because 678 // other duplicates will still be the original socket, but hopefully 679 // nothing we're dealing with does that. 680 static intptr_t ConnectTrapCommon(SandboxBrokerClient* aBroker, int aFd, 681 const struct sockaddr_un* aAddr, 682 socklen_t aLen) { 683 if (aFd < 0) { 684 return -EBADF; 685 } 686 const auto maybeDomain = DoGetSockOpt(aFd, SO_DOMAIN); 687 if (!maybeDomain) { 688 return -errno; 689 } 690 if (*maybeDomain != AF_UNIX) { 691 return -EAFNOSUPPORT; 692 } 693 const auto maybeType = DoGetSockOpt(aFd, SO_TYPE); 694 if (!maybeType) { 695 return -errno; 696 } 697 const int oldFlags = fcntl(aFd, F_GETFL); 698 if (oldFlags == -1) { 699 return -errno; 700 } 701 const int newFd = aBroker->Connect(aAddr, aLen, *maybeType); 702 if (newFd < 0) { 703 return newFd; 704 } 705 // Copy over the nonblocking flag. The connect() won't be 706 // nonblocking in that case, but that shouldn't matter for 707 // AF_UNIX. The other fcntl-settable flags are either irrelevant 708 // for sockets (e.g., O_APPEND) or would be blocked by this 709 // seccomp-bpf policy, so they're ignored. 710 if (fcntl(newFd, F_SETFL, oldFlags & O_NONBLOCK) != 0) { 711 close(newFd); 712 return -errno; 713 } 714 if (dup2(newFd, aFd) < 0) { 715 close(newFd); 716 return -errno; 717 } 718 close(newFd); 719 return 0; 720 } 721 722 static intptr_t ConnectTrap(ArgsRef aArgs, void* aux) { 723 typedef const struct sockaddr_un* AddrPtr; 724 725 return ConnectTrapCommon(static_cast<SandboxBrokerClient*>(aux), 726 static_cast<int>(aArgs.args[0]), 727 reinterpret_cast<AddrPtr>(aArgs.args[1]), 728 static_cast<socklen_t>(aArgs.args[2])); 729 } 730 731 static intptr_t ConnectTrapLegacy(ArgsRef aArgs, void* aux) { 732 const auto innerArgs = reinterpret_cast<unsigned long*>(aArgs.args[1]); 733 typedef const struct sockaddr_un* AddrPtr; 734 735 return ConnectTrapCommon(static_cast<SandboxBrokerClient*>(aux), 736 static_cast<int>(innerArgs[0]), 737 reinterpret_cast<AddrPtr>(innerArgs[1]), 738 static_cast<socklen_t>(innerArgs[2])); 739 } 740 741 static intptr_t StatFsTrap(ArgsRef aArgs, void* aux) { 742 // Warning: the kernel interface is not the C interface. The 743 // structs are different (<asm/statfs.h> vs. <sys/statfs.h>), and 744 // the statfs64 version takes an additional size parameter. 745 auto path = reinterpret_cast<const char*>(aArgs.args[0]); 746 int fd = open(path, O_RDONLY | O_LARGEFILE); 747 if (fd < 0) { 748 return -errno; 749 } 750 751 intptr_t rv; 752 switch (aArgs.nr) { 753 case __NR_statfs: { 754 auto buf = reinterpret_cast<void*>(aArgs.args[1]); 755 rv = DoSyscall(__NR_fstatfs, fd, buf); 756 break; 757 } 758 #ifdef __NR_statfs64 759 case __NR_statfs64: { 760 auto sz = static_cast<size_t>(aArgs.args[1]); 761 auto buf = reinterpret_cast<void*>(aArgs.args[2]); 762 rv = DoSyscall(__NR_fstatfs64, fd, sz, buf); 763 break; 764 } 765 #endif 766 default: 767 MOZ_ASSERT(false); 768 rv = -ENOSYS; 769 } 770 771 close(fd); 772 return rv; 773 } 774 775 public: 776 ResultExpr InvalidSyscall() const override { 777 return Trap(BlockedSyscallTrap, nullptr); 778 } 779 780 virtual ResultExpr ClonePolicy(ResultExpr failPolicy) const { 781 // Allow use for simple thread creation (pthread_create) only. 782 783 // WARNING: s390 and cris pass the flags in the second arg -- see 784 // CLONE_BACKWARDS2 in arch/Kconfig in the kernel source -- but we 785 // don't support seccomp-bpf on those archs yet. 786 Arg<int> flags(0); 787 788 // The exact flags used can vary. CLONE_DETACHED is used by musl 789 // and by old versions of Android (<= JB 4.2), but it's been 790 // ignored by the kernel since the beginning of the Git history. 791 // 792 // If we ever need to support Android <= KK 4.4 again, SETTLS 793 // and the *TID flags will need to be made optional. 794 static const int flags_required = 795 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | 796 CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | 797 CLONE_CHILD_CLEARTID; 798 static const int flags_optional = CLONE_DETACHED; 799 800 return If((flags & ~flags_optional) == flags_required, Allow()) 801 .Else(failPolicy); 802 } 803 804 virtual ResultExpr PrctlPolicy() const { 805 Arg<int> op(0); 806 Arg<int> arg2(1); 807 return Switch(op) 808 .Case(PR_SET_VMA, // Tagging of anonymous memory mappings 809 If(arg2 == PR_SET_VMA_ANON_NAME, Allow()).Else(InvalidSyscall())) 810 .Cases({PR_GET_SECCOMP, // BroadcastSetThreadSandbox, etc. 811 PR_SET_NAME, // Thread creation 812 PR_SET_DUMPABLE, // Crash reporting 813 PR_SET_PTRACER}, // Debug-mode crash handling 814 Allow()) 815 .Case(PR_CAPBSET_READ, // libcap.so.2 loaded by libpulse.so.0 816 // queries for capabilities 817 Error(EINVAL)) 818 #if defined(MOZ_PROFILE_GENERATE) 819 .Case(PR_GET_PDEATHSIG, Allow()) 820 #endif // defined(MOZ_PROFILE_GENERATE) 821 .Default(InvalidSyscall()); 822 } 823 824 virtual BoolExpr MsgFlagsAllowed(const Arg<int>& aFlags) const { 825 // MSG_DONTWAIT: used by IPC 826 // MSG_NOSIGNAL: used by the sandbox (broker, reporter) 827 // MSG_CMSG_CLOEXEC: should be used by anything that's passed fds 828 static constexpr int kNeeded = 829 MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_CLOEXEC; 830 831 // These don't appear to be used in our code at the moment, but 832 // they seem low-risk enough to allow to avoid the possibility of 833 // breakage. (Necko might use MSG_PEEK, but the socket process 834 // overrides this method.) 835 static constexpr int kHarmless = MSG_PEEK | MSG_WAITALL | MSG_TRUNC; 836 837 static constexpr int kAllowed = kNeeded | kHarmless; 838 return (aFlags & ~kAllowed) == 0; 839 } 840 841 static ResultExpr UnpackSocketcallOrAllow() { 842 // See bug 1066750. 843 if (HasSeparateSocketCalls()) { 844 // If this is a socketcall(2) platform, but the kernel also 845 // supports separate syscalls (>= 4.3.0), we can unpack the 846 // arguments and filter them. 847 return Trap(SocketcallUnpackTrap, nullptr); 848 } 849 // Otherwise, we can't filter the args if the platform passes 850 // them by pointer. 851 return Allow(); 852 } 853 854 Maybe<ResultExpr> EvaluateSocketCall(int aCall, 855 bool aHasArgs) const override { 856 switch (aCall) { 857 case SYS_RECVMSG: 858 case SYS_SENDMSG: 859 if (aHasArgs) { 860 Arg<int> flags(2); 861 return Some( 862 If(MsgFlagsAllowed(flags), Allow()).Else(InvalidSyscall())); 863 } 864 return Some(UnpackSocketcallOrAllow()); 865 866 // These next four weren't needed for IPC or other core 867 // functionality when they were added, but they're subsets of 868 // recvmsg/sendmsg so there's nothing gained by not allowing 869 // them here (and simplifying subclasses). Also, there may be 870 // unknown dependencies on them now. 871 case SYS_RECVFROM: 872 case SYS_SENDTO: 873 case SYS_RECV: 874 case SYS_SEND: 875 if (aHasArgs) { 876 Arg<int> flags(3); 877 return Some( 878 If(MsgFlagsAllowed(flags), Allow()).Else(InvalidSyscall())); 879 } 880 return Some(UnpackSocketcallOrAllow()); 881 882 case SYS_SOCKETPAIR: { 883 // We try to allow "safe" (always connected) socketpairs when using the 884 // file broker, or for content processes, but we may need to fall back 885 // and allow all socketpairs in some cases, see bug 1066750. 886 if (!mBroker && !mAllowUnsafeSocketPair) { 887 return Nothing(); 888 } 889 if (!aHasArgs) { 890 return Some(UnpackSocketcallOrAllow()); 891 } 892 Arg<int> domain(0), type(1); 893 return Some( 894 If(domain == AF_UNIX, 895 Switch(type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 896 .Case(SOCK_STREAM, Allow()) 897 .Case(SOCK_SEQPACKET, Allow()) 898 // This is used only by content (and only for 899 // direct PulseAudio, which is deprecated) but it 900 // doesn't increase attack surface: 901 .Case(SOCK_DGRAM, Trap(SocketpairDatagramTrap, nullptr)) 902 .Default(InvalidSyscall())) 903 .Else(InvalidSyscall())); 904 } 905 906 case SYS_GETSOCKOPT: { 907 // Best-effort argument filtering as for socketpair(2), above. 908 if (!aHasArgs) { 909 if (HasSeparateSocketCalls()) { 910 return Some(Trap(SocketcallUnpackTrap, nullptr)); 911 } 912 return Some(Allow()); 913 } 914 Arg<int> level(1), optname(2); 915 // SO_SNDBUF is used by IPC to avoid constructing 916 // unnecessarily large gather arrays for `sendmsg`. 917 // 918 // SO_DOMAIN and SO_TYPE are needed for connect() brokering, 919 // but they're harmless even when it's not enabled. 920 return Some(If(AllOf(level == SOL_SOCKET, 921 AnyOf(optname == SO_SNDBUF, optname == SO_DOMAIN, 922 optname == SO_TYPE)), 923 Allow()) 924 .Else(InvalidSyscall())); 925 } 926 927 // These two cases are for connect() brokering, if enabled. 928 case SYS_SOCKET: 929 if (mBrokeredConnect) { 930 const auto trapFn = aHasArgs ? FakeSocketTrap : FakeSocketTrapLegacy; 931 MOZ_ASSERT(mBroker); 932 return Some(Trap(trapFn, mBroker)); 933 } 934 return Nothing(); 935 936 case SYS_CONNECT: 937 if (mBrokeredConnect) { 938 const auto trapFn = aHasArgs ? ConnectTrap : ConnectTrapLegacy; 939 MOZ_ASSERT(mBroker); 940 return Some(Trap(trapFn, mBroker)); 941 } 942 return Nothing(); 943 944 default: 945 return Nothing(); 946 } 947 } 948 949 ResultExpr EvaluateSyscall(int sysno) const override { 950 // If a file broker client was provided, route syscalls to it; 951 // otherwise, fall through to the main policy, which will deny 952 // them. 953 if (mBroker) { 954 switch (sysno) { 955 #ifdef __NR_open 956 case __NR_open: 957 return Trap(OpenTrap, mBroker); 958 case __NR_access: 959 return Trap(AccessTrap, mBroker); 960 CASES_FOR_stat: 961 return Trap(StatTrap, mBroker); 962 CASES_FOR_lstat: 963 return Trap(LStatTrap, mBroker); 964 case __NR_chmod: 965 return Trap(ChmodTrap, mBroker); 966 case __NR_link: 967 return Trap(LinkTrap, mBroker); 968 case __NR_mkdir: 969 return Trap(MkdirTrap, mBroker); 970 case __NR_symlink: 971 return Trap(SymlinkTrap, mBroker); 972 case __NR_rename: 973 return Trap(RenameTrap, mBroker); 974 case __NR_rmdir: 975 return Trap(RmdirTrap, mBroker); 976 case __NR_unlink: 977 return Trap(UnlinkTrap, mBroker); 978 case __NR_readlink: 979 return Trap(ReadlinkTrap, mBroker); 980 #endif 981 case __NR_openat: 982 return Trap(OpenAtTrap, mBroker); 983 case __NR_faccessat: 984 return Trap(AccessAtTrap, mBroker); 985 case __NR_faccessat2: 986 return Trap(AccessAt2Trap, mBroker); 987 CASES_FOR_fstatat: 988 return Trap(StatAtTrap, mBroker); 989 // Used by new libc and Rust's stdlib, if available. 990 // We don't have broker support yet so claim it does not exist. 991 case __NR_statx: 992 return Error(ENOSYS); 993 case __NR_fchmodat: 994 return Trap(ChmodAtTrap, mBroker); 995 case __NR_linkat: 996 return Trap(LinkAtTrap, mBroker); 997 case __NR_mkdirat: 998 return Trap(MkdirAtTrap, mBroker); 999 case __NR_symlinkat: 1000 return Trap(SymlinkAtTrap, mBroker); 1001 case __NR_renameat: 1002 return Trap(RenameAtTrap, mBroker); 1003 case __NR_unlinkat: 1004 return Trap(UnlinkAtTrap, mBroker); 1005 case __NR_readlinkat: 1006 return Trap(ReadlinkAtTrap, mBroker); 1007 } 1008 } else { 1009 // In the absence of a broker we still need to handle the 1010 // fstat-equivalent subset of fstatat; see bug 1673770. 1011 switch (sysno) { 1012 // statx may be used for fstat (bug 1867673) 1013 case __NR_statx: 1014 return Error(ENOSYS); 1015 CASES_FOR_fstatat: 1016 return Trap(StatAtTrap, nullptr); 1017 } 1018 } 1019 1020 switch (sysno) { 1021 // Timekeeping 1022 // 1023 // (Note: the switch needs to start with a literal case, not a 1024 // macro; otherwise clang-format gets confused.) 1025 case __NR_gettimeofday: 1026 #ifdef __NR_time 1027 case __NR_time: 1028 #endif 1029 case __NR_nanosleep: 1030 return Allow(); 1031 1032 CASES_FOR_clock_gettime: 1033 CASES_FOR_clock_getres: 1034 CASES_FOR_clock_nanosleep: { 1035 // clockid_t can encode a pid or tid to monitor another 1036 // process or thread's CPU usage (see CPUCLOCK_PID and related 1037 // definitions in include/linux/posix-timers.h in the kernel 1038 // source). For threads, the kernel allows only tids within 1039 // the calling process, so it isn't a problem if we don't 1040 // filter those; pids do need to be restricted to the current 1041 // process in order to not leak information. 1042 Arg<clockid_t> clk_id(0); 1043 #ifdef MOZ_GECKO_PROFILER 1044 clockid_t this_process = 1045 MAKE_PROCESS_CPUCLOCK(getpid(), CPUCLOCK_SCHED); 1046 #endif 1047 return If(clk_id == CLOCK_MONOTONIC, Allow()) 1048 #ifdef CLOCK_MONOTONIC_COARSE 1049 // Used by SandboxReporter, among other things. 1050 .ElseIf(clk_id == CLOCK_MONOTONIC_COARSE, Allow()) 1051 #endif 1052 #ifdef CLOCK_MONOTONIC_RAW 1053 .ElseIf(clk_id == CLOCK_MONOTONIC_RAW, Allow()) 1054 #endif 1055 .ElseIf(clk_id == CLOCK_PROCESS_CPUTIME_ID, Allow()) 1056 .ElseIf(clk_id == CLOCK_REALTIME, Allow()) 1057 #ifdef CLOCK_REALTIME_COARSE 1058 .ElseIf(clk_id == CLOCK_REALTIME_COARSE, Allow()) 1059 #endif 1060 .ElseIf(clk_id == CLOCK_THREAD_CPUTIME_ID, Allow()) 1061 #ifdef MOZ_GECKO_PROFILER 1062 // Allow clock_gettime on the same process. 1063 .ElseIf(clk_id == this_process, Allow()) 1064 // Allow clock_gettime on a thread. 1065 .ElseIf((clk_id & 7u) == (CPUCLOCK_PERTHREAD_MASK | CPUCLOCK_SCHED), 1066 Allow()) 1067 #endif 1068 #ifdef CLOCK_BOOTTIME 1069 .ElseIf(clk_id == CLOCK_BOOTTIME, Allow()) 1070 #endif 1071 .Else(InvalidSyscall()); 1072 } 1073 1074 // Thread synchronization 1075 CASES_FOR_futex: 1076 // FIXME(bug 1441993): This could be more restrictive. 1077 return Allow(); 1078 1079 // Asynchronous I/O 1080 CASES_FOR_epoll_create: 1081 CASES_FOR_epoll_wait: 1082 case __NR_epoll_ctl: 1083 CASES_FOR_poll: 1084 return Allow(); 1085 1086 // Used when requesting a crash dump. 1087 CASES_FOR_pipe: 1088 return Allow(); 1089 1090 // Metadata of opened files 1091 CASES_FOR_fstat: 1092 return Allow(); 1093 1094 CASES_FOR_fcntl: { 1095 Arg<int> cmd(1); 1096 Arg<int> flags(2); 1097 // Typical use of F_SETFL is to modify the flags returned by 1098 // F_GETFL and write them back, including some flags that 1099 // F_SETFL ignores. This is a default-deny policy in case any 1100 // new SETFL-able flags are added. (In particular we want to 1101 // forbid O_ASYNC; see bug 1328896, but also see bug 1408438.) 1102 static const int ignored_flags = 1103 O_ACCMODE | O_LARGEFILE_REAL | O_CLOEXEC | FMODE_NONOTIFY; 1104 static const int allowed_flags = ignored_flags | O_APPEND | O_NONBLOCK; 1105 return Switch(cmd) 1106 // Close-on-exec is meaningless when execve isn't allowed, but 1107 // NSPR reads the bit and asserts that it has the expected value. 1108 .Case(F_GETFD, Allow()) 1109 .Case( 1110 F_SETFD, 1111 If((flags & ~FD_CLOEXEC) == 0, Allow()).Else(InvalidSyscall())) 1112 // F_GETFL is also used by fdopen 1113 .Case(F_GETFL, Allow()) 1114 .Case(F_SETFL, If((flags & ~allowed_flags) == 0, Allow()) 1115 .Else(InvalidSyscall())) 1116 #if defined(MOZ_PROFILE_GENERATE) 1117 .Case(F_SETLKW, Allow()) 1118 #endif 1119 // Not much different from other forms of dup(), and commonly used. 1120 .Case(F_DUPFD_CLOEXEC, Allow()) 1121 // Used by Mesa, generally useful, and harmless: tests if 1122 // two file descriptors refer to the same file description. 1123 .Case(F_DUPFD_QUERY, Allow()) 1124 .Default(SandboxPolicyBase::EvaluateSyscall(sysno)); 1125 } 1126 1127 // Simple I/O 1128 case __NR_pread64: 1129 case __NR_write: 1130 case __NR_read: 1131 case __NR_readv: 1132 case __NR_writev: // see SandboxLogging.cpp 1133 CASES_FOR_lseek: 1134 return Allow(); 1135 1136 CASES_FOR_getdents: 1137 return Allow(); 1138 1139 CASES_FOR_ftruncate: 1140 case __NR_fallocate: 1141 return mMayCreateShmem ? Allow() : InvalidSyscall(); 1142 1143 // Used by our fd/shm classes 1144 case __NR_dup: 1145 return Allow(); 1146 1147 // Memory mapping 1148 CASES_FOR_mmap: { 1149 Arg<int> flags(3); 1150 // Explicit huge-page mapping has a history of bugs, and 1151 // generally isn't used outside of server applications. 1152 static constexpr int kBadFlags = 1153 MAP_HUGETLB | (MAP_HUGE_MASK << MAP_HUGE_SHIFT); 1154 // ENOSYS seems to be what the kernel would return if 1155 // CONFIG_HUGETLBFS=n. (This uses Error rather than 1156 // InvalidSyscall because the latter would crash on Nightly, 1157 // and I don't think those reports would be actionable.) 1158 return If((flags & kBadFlags) != 0, Error(ENOSYS)).Else(Allow()); 1159 } 1160 case __NR_munmap: 1161 return Allow(); 1162 1163 // Shared memory 1164 case __NR_memfd_create: { 1165 Arg<unsigned> flags(1); 1166 // See above about mmap MAP_HUGETLB. 1167 static constexpr int kBadFlags = 1168 MFD_HUGETLB | (MFD_HUGE_MASK << MFD_HUGE_SHIFT); 1169 return If((flags & kBadFlags) != 0, Error(ENOSYS)).Else(Allow()); 1170 } 1171 1172 // ipc::Shmem; also, glibc when creating threads: 1173 case __NR_mprotect: 1174 return Allow(); 1175 1176 #if !defined(MOZ_MEMORY) 1177 // No jemalloc means using a system allocator like glibc 1178 // that might use brk. 1179 case __NR_brk: 1180 return Allow(); 1181 1182 // Similarly, mremap (bugs: 1047620, 1286119, 1860267) 1183 case __NR_mremap: { 1184 Arg<int> flags(3); 1185 return If((flags & ~MREMAP_MAYMOVE) == 0, Allow()) 1186 .Else(SandboxPolicyBase::EvaluateSyscall(sysno)); 1187 } 1188 #endif 1189 1190 // madvise hints used by malloc; see bug 1303813 and bug 1364533 1191 case __NR_madvise: { 1192 Arg<int> advice(2); 1193 // The GMP specific sandbox duplicates this logic, so when adding 1194 // allowed values here also add them to the GMP sandbox rules. 1195 return If(advice == MADV_DONTNEED, Allow()) 1196 .ElseIf(advice == MADV_FREE, Allow()) 1197 // Used by glibc (and maybe someday mozjemalloc). 1198 .ElseIf(advice == MADV_GUARD_INSTALL, Allow()) 1199 .ElseIf(advice == MADV_GUARD_REMOVE, Allow()) 1200 // Formerly used by mozjemalloc; unclear if current use: 1201 .ElseIf(advice == MADV_HUGEPAGE, Allow()) 1202 .ElseIf(advice == MADV_NOHUGEPAGE, Allow()) 1203 #ifdef MOZ_ASAN 1204 .ElseIf(advice == MADV_DONTDUMP, Allow()) 1205 #endif 1206 .ElseIf(advice == MADV_MERGEABLE, Error(EPERM)) // bug 1705045 1207 .Else(InvalidSyscall()); 1208 } 1209 1210 // musl libc will set this up in pthreads support. 1211 case __NR_membarrier: 1212 return Allow(); 1213 1214 // Signal handling 1215 case __NR_sigaltstack: 1216 CASES_FOR_sigreturn: 1217 CASES_FOR_sigprocmask: 1218 CASES_FOR_sigaction: 1219 return Allow(); 1220 1221 // Send signals within the process (raise(), profiling, etc.) 1222 case __NR_tgkill: { 1223 Arg<pid_t> tgid(0); 1224 return If(tgid == getpid(), Allow()).Else(InvalidSyscall()); 1225 } 1226 1227 // Polyfill with tgkill; see above. 1228 case __NR_tkill: 1229 return Trap(TKillCompatTrap, nullptr); 1230 1231 // Yield 1232 case __NR_sched_yield: 1233 return Allow(); 1234 1235 // Thread creation. 1236 case __NR_clone: 1237 return ClonePolicy(InvalidSyscall()); 1238 1239 case __NR_clone3: 1240 return Error(ENOSYS); 1241 1242 // More thread creation. 1243 #ifdef __NR_set_robust_list 1244 case __NR_set_robust_list: 1245 return Allow(); 1246 #endif 1247 #ifdef ANDROID 1248 case __NR_set_tid_address: 1249 return Allow(); 1250 #endif 1251 1252 // prctl 1253 case __NR_prctl: { 1254 // WARNING: do not handle __NR_prctl directly in subclasses; 1255 // override PrctlPolicy instead. The special handling of 1256 // PR_SET_NO_NEW_PRIVS is used to detect that a thread already 1257 // has the policy applied; see also bug 1257361. 1258 1259 if (SandboxInfo::Get().Test(SandboxInfo::kHasSeccompTSync)) { 1260 return PrctlPolicy(); 1261 } 1262 1263 Arg<int> option(0); 1264 return If(option == PR_SET_NO_NEW_PRIVS, 1265 Trap(SetNoNewPrivsTrap, nullptr)) 1266 .Else(PrctlPolicy()); 1267 } 1268 1269 #if defined(GP_PLAT_amd64_linux) && defined(GP_ARCH_amd64) && \ 1270 defined(MOZ_USING_WASM_SANDBOXING) 1271 // arch_prctl 1272 case __NR_arch_prctl: { 1273 // Bug 1923701 - Needed for by RLBox-wasm2c: Buggy libraries are 1274 // sandboxed with RLBox and wasm2c (Wasm). wasm2c offers an optimization 1275 // for performance that uses the otherwise-unused GS register on x86. 1276 // The GS register is only settable using the arch_prctl platforms on 1277 // older x86 CPUs that don't have the wrgsbase instruction. This 1278 // optimization is currently only supported on linux+clang+x86_64. 1279 Arg<int> op(0); 1280 return If(op == ARCH_SET_GS, Allow()) 1281 .Else(SandboxPolicyBase::EvaluateSyscall(sysno)); 1282 } 1283 #endif 1284 1285 // NSPR can call this when creating a thread, but it will accept a 1286 // polite "no". 1287 case __NR_getpriority: 1288 // But if thread creation races with sandbox startup, that call 1289 // could succeed, and then we get one of these: 1290 case __NR_setpriority: 1291 return Error(EACCES); 1292 1293 // Stack bounds are obtained via pthread_getattr_np, which calls 1294 // this but doesn't actually need it: 1295 case __NR_sched_getaffinity: 1296 return Error(ENOSYS); 1297 1298 // Identifies the processor and node where this thread or process is 1299 // running. This is used by "Awake" profiler markers. 1300 case __NR_getcpu: 1301 return Allow(); 1302 1303 // Read own pid/tid. 1304 case __NR_getpid: 1305 case __NR_gettid: 1306 return Allow(); 1307 1308 // Discard capabilities 1309 case __NR_close: 1310 return Allow(); 1311 1312 // Machine-dependent stuff 1313 #ifdef __arm__ 1314 case __ARM_NR_breakpoint: 1315 case __ARM_NR_cacheflush: 1316 case __ARM_NR_usr26: // FIXME: do we actually need this? 1317 case __ARM_NR_usr32: 1318 case __ARM_NR_set_tls: 1319 return Allow(); 1320 #endif 1321 1322 // Needed when being debugged: 1323 case __NR_restart_syscall: 1324 return Allow(); 1325 1326 // Terminate threads or the process 1327 case __NR_exit: 1328 case __NR_exit_group: 1329 return Allow(); 1330 1331 case __NR_getrandom: 1332 return Allow(); 1333 1334 // Used by almost every process: GMP needs them for Clearkey 1335 // because of bug 1576006 (but may not need them for other 1336 // plugin types; see bug 1737092). Given that fstat is 1337 // allowed, the uid/gid are probably available anyway. 1338 CASES_FOR_getuid: 1339 CASES_FOR_getgid: 1340 CASES_FOR_geteuid: 1341 CASES_FOR_getegid: 1342 return Allow(); 1343 1344 #ifdef DESKTOP 1345 // Bug 1543858: glibc's qsort calls sysinfo to check the 1346 // memory size; it falls back to assuming there's enough RAM. 1347 case __NR_sysinfo: 1348 return Error(EPERM); 1349 #endif 1350 1351 // Bug 1651701: an API for restartable atomic sequences and 1352 // per-CPU data; exposing information about CPU numbers and 1353 // when threads are migrated or preempted isn't great but the 1354 // risk should be relatively low. 1355 case __NR_rseq: 1356 return Allow(); 1357 1358 case __NR_ioctl: { 1359 Arg<unsigned long> request(1); 1360 #ifdef MOZ_ASAN 1361 Arg<int> fd(0); 1362 #endif // MOZ_ASAN 1363 // Make isatty() return false, because none of the terminal 1364 // ioctls will be allowed; libraries sometimes call this for 1365 // various reasons (e.g., to decide whether to emit ANSI/VT 1366 // color codes when logging to stderr). glibc uses TCGETS and 1367 // musl uses TIOCGWINSZ. 1368 // 1369 // This is required by ffmpeg 1370 return If(AnyOf(request == TCGETS, request == TIOCGWINSZ, 1371 request == TCGETS2), 1372 Error(ENOTTY)) 1373 #ifdef MOZ_ASAN 1374 // ASAN's error reporter wants to know if stderr is a tty. 1375 .ElseIf(fd == STDERR_FILENO, Error(ENOTTY)) 1376 #endif // MOZ_ASAN 1377 .Else(SandboxPolicyBase::EvaluateSyscall(sysno)); 1378 } 1379 1380 CASES_FOR_dup2: // See ConnectTrapCommon 1381 if (mBrokeredConnect) { 1382 return Allow(); 1383 } 1384 return SandboxPolicyBase::EvaluateSyscall(sysno); 1385 1386 #ifdef MOZ_ASAN 1387 // ...and before compiler-rt r209773, it will call readlink on 1388 // /proc/self/exe and use the cached value only if that fails: 1389 case __NR_readlink: 1390 case __NR_readlinkat: 1391 return Error(ENOENT); 1392 1393 // ...and if it found an external symbolizer, it will try to run it: 1394 // (See also bug 1081242 comment #7.) 1395 CASES_FOR_stat: 1396 return Error(ENOENT); 1397 #endif // MOZ_ASAN 1398 1399 // Replace statfs with open (which may be brokered) and 1400 // fstatfs (which is not allowed in this policy, but may be 1401 // allowed by subclasses if they wish to enable statfs). 1402 CASES_FOR_statfs: 1403 return Trap(StatFsTrap, nullptr); 1404 1405 // GTK's theme parsing tries to getcwd() while sandboxed, but 1406 // only during Talos runs. 1407 // Also, Rust panics call getcwd to try to print relative paths 1408 // in backtraces. 1409 case __NR_getcwd: 1410 return Error(ENOENT); 1411 1412 // Basically every process type ends up using this for some 1413 // reason (nsSystemInfo in content, Mesa in RDD, bug 1992904 for 1414 // utility, etc.). Other than GMP, which overrides this (see 1415 // below), it's relatively safe to expose this information. 1416 case __NR_uname: 1417 return Allow(); 1418 1419 default: 1420 return SandboxPolicyBase::EvaluateSyscall(sysno); 1421 } 1422 } 1423 }; 1424 1425 // The process-type-specific syscall rules start here: 1426 1427 // The seccomp-bpf filter for content processes is not a true sandbox 1428 // on its own; its purpose is attack surface reduction and syscall 1429 // interception in support of a semantic sandboxing layer. On B2G 1430 // this is the Android process permission model; on desktop, 1431 // namespaces and chroot() will be used. 1432 class ContentSandboxPolicy : public SandboxPolicyCommon { 1433 private: 1434 ContentProcessSandboxParams mParams; 1435 bool mAllowSysV; 1436 bool mUsingRenderDoc; 1437 1438 bool BelowLevel(int aLevel) const { return mParams.mLevel < aLevel; } 1439 ResultExpr AllowBelowLevel(int aLevel, ResultExpr aOrElse) const { 1440 return BelowLevel(aLevel) ? Allow() : std::move(aOrElse); 1441 } 1442 ResultExpr AllowBelowLevel(int aLevel) const { 1443 return AllowBelowLevel(aLevel, InvalidSyscall()); 1444 } 1445 1446 static intptr_t GetPPidTrap(ArgsRef aArgs, void* aux) { 1447 // In a pid namespace, getppid() will return 0. We will return 0 instead 1448 // of the real parent pid to see what breaks when we introduce the 1449 // pid namespace (Bug 1151624). 1450 return 0; 1451 } 1452 1453 public: 1454 ContentSandboxPolicy(SandboxBrokerClient* aBroker, 1455 ContentProcessSandboxParams&& aParams) 1456 : mParams(std::move(aParams)), 1457 mAllowSysV(PR_GetEnv("MOZ_SANDBOX_ALLOW_SYSV") != nullptr), 1458 mUsingRenderDoc(PR_GetEnv("RENDERDOC_CAPTUREOPTS") != nullptr) { 1459 mBroker = aBroker; 1460 mMayCreateShmem = true; 1461 mAllowUnsafeSocketPair = true; 1462 mBrokeredConnect = true; 1463 } 1464 1465 ~ContentSandboxPolicy() override = default; 1466 1467 Maybe<ResultExpr> EvaluateSocketCall(int aCall, 1468 bool aHasArgs) const override { 1469 switch (aCall) { 1470 #ifdef ANDROID 1471 case SYS_SOCKET: 1472 return Some(Error(EACCES)); 1473 #else // #ifdef DESKTOP 1474 case SYS_SOCKET: 1475 case SYS_CONNECT: 1476 if (BelowLevel(4)) { 1477 return Some(Allow()); 1478 } 1479 return SandboxPolicyCommon::EvaluateSocketCall(aCall, aHasArgs); 1480 1481 // FIXME (bug 1761134): sockopts should be filtered 1482 case SYS_GETSOCKOPT: 1483 case SYS_SETSOCKOPT: 1484 // These next 3 were needed for X11; they may not be needed 1485 // with X11 lockdown, but there's not much attack surface here. 1486 case SYS_GETSOCKNAME: 1487 case SYS_GETPEERNAME: 1488 case SYS_SHUTDOWN: 1489 return Some(Allow()); 1490 1491 case SYS_ACCEPT: 1492 case SYS_ACCEPT4: 1493 if (mUsingRenderDoc) { 1494 return Some(Allow()); 1495 } 1496 [[fallthrough]]; 1497 #endif 1498 default: 1499 return SandboxPolicyCommon::EvaluateSocketCall(aCall, aHasArgs); 1500 } 1501 } 1502 1503 #ifdef DESKTOP 1504 Maybe<ResultExpr> EvaluateIpcCall(int aCall, int aArgShift) const override { 1505 switch (aCall) { 1506 // These are a problem: SysV IPC follows the Unix "same uid 1507 // policy" and can't be restricted/brokered like file access. 1508 // We're not using it directly, but there are some library 1509 // dependencies that do; see ContentNeedsSysVIPC() in 1510 // SandboxLaunch.cpp. Also, Cairo as used by GTK will sometimes 1511 // try to use MIT-SHM, so shmget() is a non-fatal error. See 1512 // also bug 1376910 and bug 1438401. 1513 case SHMGET: 1514 return Some(mAllowSysV ? Allow() : Error(EPERM)); 1515 case SHMCTL: 1516 case SHMAT: 1517 case SHMDT: 1518 case SEMGET: 1519 case SEMCTL: 1520 case SEMOP: 1521 if (mAllowSysV) { 1522 return Some(Allow()); 1523 } 1524 return SandboxPolicyCommon::EvaluateIpcCall(aCall, aArgShift); 1525 default: 1526 return SandboxPolicyCommon::EvaluateIpcCall(aCall, aArgShift); 1527 } 1528 } 1529 #endif 1530 1531 #ifdef MOZ_PULSEAUDIO 1532 ResultExpr PrctlPolicy() const override { 1533 if (BelowLevel(4)) { 1534 Arg<int> op(0); 1535 return If(op == PR_GET_NAME, Allow()) 1536 .Else(SandboxPolicyCommon::PrctlPolicy()); 1537 } 1538 return SandboxPolicyCommon::PrctlPolicy(); 1539 } 1540 #endif 1541 1542 ResultExpr EvaluateSyscall(int sysno) const override { 1543 // Straight allow for anything that got overriden via prefs 1544 const auto& whitelist = mParams.mSyscallWhitelist; 1545 if (std::find(whitelist.begin(), whitelist.end(), sysno) != 1546 whitelist.end()) { 1547 if (SandboxInfo::Get().Test(SandboxInfo::kVerbose)) { 1548 SANDBOX_LOG("Allowing syscall nr %d via whitelist", sysno); 1549 } 1550 return Allow(); 1551 } 1552 1553 // Level 1 has been removed. If seccomp-bpf is used, then we're 1554 // necessarily at level >= 2 and filesystem access is brokered. 1555 MOZ_ASSERT(!BelowLevel(2)); 1556 MOZ_ASSERT(mBroker); 1557 1558 switch (sysno) { 1559 #ifdef DESKTOP 1560 case __NR_getppid: 1561 return Trap(GetPPidTrap, nullptr); 1562 1563 # ifdef MOZ_PULSEAUDIO 1564 CASES_FOR_fchown: 1565 case __NR_fchmod: 1566 return AllowBelowLevel(4); 1567 # endif 1568 CASES_FOR_fstatfs: // fontconfig, pulseaudio, GIO (see also statfs) 1569 case __NR_flock: // graphics 1570 return Allow(); 1571 1572 // Bug 1354731: proprietary GL drivers try to mknod() their devices 1573 # ifdef __NR_mknod 1574 case __NR_mknod: 1575 # endif 1576 case __NR_mknodat: { 1577 Arg<mode_t> mode(sysno == __NR_mknodat ? 2 : 1); 1578 return If((mode & S_IFMT) == S_IFCHR, Error(EPERM)) 1579 .Else(InvalidSyscall()); 1580 } 1581 // Bug 1438389: ...and nvidia GL will sometimes try to chown the devices 1582 # ifdef __NR_chown 1583 case __NR_chown: 1584 # endif 1585 case __NR_fchownat: 1586 return Error(EPERM); 1587 #endif 1588 1589 CASES_FOR_select: 1590 return Allow(); 1591 1592 case __NR_writev: 1593 #ifdef DESKTOP 1594 case __NR_pwrite64: 1595 case __NR_readahead: 1596 #endif 1597 return Allow(); 1598 1599 case __NR_ioctl: { 1600 #ifdef MOZ_ALSA 1601 if (BelowLevel(4)) { 1602 return Allow(); 1603 } 1604 #endif 1605 Arg<unsigned long> request(1); 1606 auto shifted_type = request & kIoctlTypeMask; 1607 1608 // Rust's stdlib seems to use FIOCLEX instead of equivalent fcntls. 1609 return If(request == FIOCLEX, Allow()) 1610 // Rust's stdlib also uses FIONBIO instead of equivalent fcntls. 1611 .ElseIf(request == FIONBIO, Allow()) 1612 // Allow anything that isn't a tty ioctl, if level < 6 1613 .ElseIf( 1614 BelowLevel(6) ? shifted_type != kTtyIoctls : BoolConst(false), 1615 Allow()) 1616 .Else(SandboxPolicyCommon::EvaluateSyscall(sysno)); 1617 } 1618 1619 CASES_FOR_fcntl: { 1620 Arg<int> cmd(1); 1621 return Switch(cmd) 1622 // Nvidia GL and fontconfig (newer versions) use fcntl file locking. 1623 .Case(F_SETLK, Allow()) 1624 #ifdef F_SETLK64 1625 .Case(F_SETLK64, Allow()) 1626 #endif 1627 // Pulseaudio uses F_SETLKW, as does fontconfig. 1628 .Case(F_SETLKW, Allow()) 1629 #ifdef F_SETLKW64 1630 .Case(F_SETLKW64, Allow()) 1631 #endif 1632 // Wayland client libraries use file seals 1633 .Case(F_ADD_SEALS, Allow()) 1634 .Case(F_GET_SEALS, Allow()) 1635 .Default(SandboxPolicyCommon::EvaluateSyscall(sysno)); 1636 } 1637 1638 case __NR_brk: 1639 // FIXME(bug 1510861) are we using any hints that aren't allowed 1640 // in SandboxPolicyCommon now? 1641 case __NR_madvise: 1642 return Allow(); 1643 1644 // wasm uses mremap (always with zero flags) 1645 case __NR_mremap: { 1646 Arg<int> flags(3); 1647 return If(flags == 0, Allow()) 1648 .Else(SandboxPolicyCommon::EvaluateSyscall(sysno)); 1649 } 1650 1651 // Bug 1462640: Mesa libEGL uses mincore to test whether values 1652 // are pointers, for reasons. 1653 case __NR_mincore: { 1654 Arg<size_t> length(1); 1655 return If(length == getpagesize(), Allow()) 1656 .Else(SandboxPolicyCommon::EvaluateSyscall(sysno)); 1657 } 1658 1659 #ifdef __NR_set_thread_area 1660 case __NR_set_thread_area: 1661 return Allow(); 1662 #endif 1663 1664 case __NR_getrusage: 1665 case __NR_times: 1666 return Allow(); 1667 1668 case __NR_fsync: 1669 case __NR_msync: 1670 return Allow(); 1671 1672 case __NR_getpriority: 1673 case __NR_setpriority: 1674 case __NR_sched_getattr: 1675 case __NR_sched_setattr: 1676 case __NR_sched_get_priority_min: 1677 case __NR_sched_get_priority_max: 1678 case __NR_sched_getscheduler: 1679 case __NR_sched_setscheduler: 1680 case __NR_sched_getparam: 1681 case __NR_sched_setparam: 1682 #ifdef DESKTOP 1683 case __NR_sched_getaffinity: 1684 #endif 1685 return Allow(); 1686 1687 #ifdef DESKTOP 1688 case __NR_sched_setaffinity: 1689 return Error(EPERM); 1690 #endif 1691 1692 #ifdef DESKTOP 1693 case __NR_pipe2: { 1694 // Restrict the flags; O_NOTIFICATION_PIPE in particular 1695 // exposes enough attack surface to be a cause for concern 1696 // (bug 1808320). O_DIRECT isn't known to be used currently 1697 // (Try passes with it blocked), but should be low-risk, and 1698 // Chromium allows it. 1699 static constexpr int allowed_flags = O_CLOEXEC | O_NONBLOCK | O_DIRECT; 1700 Arg<int> flags(1); 1701 return If((flags & ~allowed_flags) == 0, Allow()) 1702 .Else(InvalidSyscall()); 1703 } 1704 1705 CASES_FOR_getrlimit: 1706 CASES_FOR_getresuid: 1707 CASES_FOR_getresgid: 1708 return Allow(); 1709 1710 case __NR_prlimit64: { 1711 // Allow only the getrlimit() use case. (glibc seems to use 1712 // only pid 0 to indicate the current process; pid == getpid() 1713 // is equivalent and could also be allowed if needed.) 1714 Arg<pid_t> pid(0); 1715 // This is really a const struct ::rlimit*, but Arg<> doesn't 1716 // work with pointers, only integer types. 1717 Arg<uintptr_t> new_limit(2); 1718 return If(AllOf(pid == 0, new_limit == 0), Allow()) 1719 .Else(InvalidSyscall()); 1720 } 1721 1722 // PulseAudio calls umask, even though it's unsafe in 1723 // multithreaded applications. But, allowing it here doesn't 1724 // really do anything one way or the other, now that file 1725 // accesses are brokered to another process. 1726 case __NR_umask: 1727 return AllowBelowLevel(4); 1728 1729 case __NR_kill: { 1730 if (BelowLevel(4)) { 1731 Arg<int> sig(1); 1732 // PulseAudio uses kill(pid, 0) to check if purported owners of 1733 // shared memory files are still alive; see bug 1397753 for more 1734 // details. 1735 return If(sig == 0, Error(EPERM)).Else(InvalidSyscall()); 1736 } 1737 return InvalidSyscall(); 1738 } 1739 1740 case __NR_wait4: 1741 # ifdef __NR_waitpid 1742 case __NR_waitpid: 1743 # endif 1744 // NSPR will start a thread to wait for child processes even if 1745 // fork() fails; see bug 227246 and bug 1299581. 1746 return Error(ECHILD); 1747 1748 case __NR_eventfd2: 1749 return Allow(); 1750 1751 # ifdef __NR_rt_tgsigqueueinfo 1752 // Only allow to send signals within the process. 1753 case __NR_rt_tgsigqueueinfo: { 1754 Arg<pid_t> tgid(0); 1755 return If(tgid == getpid(), Allow()).Else(InvalidSyscall()); 1756 } 1757 # endif 1758 1759 case __NR_mlock: 1760 case __NR_munlock: 1761 return Allow(); 1762 1763 // We can't usefully allow fork+exec, even on a temporary basis; 1764 // the child would inherit the seccomp-bpf policy and almost 1765 // certainly die from an unexpected SIGSYS. We also can't have 1766 // fork() crash, currently, because there are too many system 1767 // libraries/plugins that try to run commands. But they can 1768 // usually do something reasonable on error. 1769 case __NR_clone: 1770 return ClonePolicy(Error(EPERM)); 1771 # ifdef __NR_fork 1772 case __NR_fork: 1773 return Error(ENOSYS); 1774 # endif 1775 1776 # ifdef __NR_fadvise64 1777 case __NR_fadvise64: 1778 return Allow(); 1779 # endif 1780 1781 # ifdef __NR_fadvise64_64 1782 case __NR_fadvise64_64: 1783 return Allow(); 1784 # endif 1785 1786 case __NR_fallocate: 1787 return Allow(); 1788 1789 case __NR_get_mempolicy: 1790 return Allow(); 1791 1792 // Required by libnuma for FFmpeg 1793 case __NR_set_mempolicy: 1794 return Error(ENOSYS); 1795 1796 case __NR_kcmp: 1797 return KcmpPolicyForMesa(); 1798 1799 #endif // DESKTOP 1800 1801 #ifdef DESKTOP 1802 case __NR_sysinfo: 1803 #endif 1804 return Allow(); 1805 1806 default: 1807 return SandboxPolicyCommon::EvaluateSyscall(sysno); 1808 } 1809 } 1810 }; 1811 1812 UniquePtr<sandbox::bpf_dsl::Policy> GetContentSandboxPolicy( 1813 SandboxBrokerClient* aMaybeBroker, ContentProcessSandboxParams&& aParams) { 1814 return MakeUnique<ContentSandboxPolicy>(aMaybeBroker, std::move(aParams)); 1815 } 1816 1817 // Unlike for content, the GeckoMediaPlugin seccomp-bpf policy needs 1818 // to be an effective sandbox by itself, because we allow GMP on Linux 1819 // systems where that's the only sandboxing mechanism we can use. 1820 // 1821 // Be especially careful about what this policy allows. 1822 class GMPSandboxPolicy : public SandboxPolicyCommon { 1823 static intptr_t OpenTrap(const arch_seccomp_data& aArgs, void* aux) { 1824 const auto files = static_cast<const SandboxOpenedFiles*>(aux); 1825 const char* path; 1826 int flags; 1827 1828 switch (aArgs.nr) { 1829 #ifdef __NR_open 1830 case __NR_open: 1831 path = reinterpret_cast<const char*>(aArgs.args[0]); 1832 flags = static_cast<int>(aArgs.args[1]); 1833 break; 1834 #endif 1835 case __NR_openat: 1836 // The path has to be absolute to match the pre-opened file (see 1837 // assertion in ctor) so the dirfd argument is ignored. 1838 path = reinterpret_cast<const char*>(aArgs.args[1]); 1839 flags = static_cast<int>(aArgs.args[2]); 1840 break; 1841 default: 1842 MOZ_CRASH("unexpected syscall number"); 1843 } 1844 1845 if ((flags & O_ACCMODE) != O_RDONLY) { 1846 SANDBOX_LOG("non-read-only open of file %s attempted (flags=0%o)", path, 1847 flags); 1848 return -EROFS; 1849 } 1850 int fd = files->GetDesc(path); 1851 if (fd < 0) { 1852 // SandboxOpenedFile::GetDesc already logged about this, if appropriate. 1853 return -ENOENT; 1854 } 1855 return fd; 1856 } 1857 1858 #if defined(__NR_stat64) || defined(__NR_stat) 1859 static intptr_t StatTrap(const arch_seccomp_data& aArgs, void* aux) { 1860 const auto* const files = static_cast<const SandboxOpenedFiles*>(aux); 1861 const auto* path = reinterpret_cast<const char*>(aArgs.args[0]); 1862 int fd = files->GetDesc(path); 1863 if (fd < 0) { 1864 // SandboxOpenedFile::GetDesc already logged about this, if appropriate. 1865 return -ENOENT; 1866 } 1867 auto* buf = reinterpret_cast<statstruct*>(aArgs.args[1]); 1868 # ifdef __NR_fstat64 1869 return DoSyscall(__NR_fstat64, fd, buf); 1870 # else 1871 return DoSyscall(__NR_fstat, fd, buf); 1872 # endif 1873 } 1874 #endif 1875 1876 static intptr_t UnameTrap(const arch_seccomp_data& aArgs, void* aux) { 1877 const auto buf = reinterpret_cast<struct utsname*>(aArgs.args[0]); 1878 PodZero(buf); 1879 // The real uname() increases fingerprinting risk for no benefit. 1880 // This is close enough. 1881 strcpy(buf->sysname, "Linux"); 1882 strcpy(buf->version, "3"); 1883 return 0; 1884 } 1885 1886 static intptr_t FcntlTrap(const arch_seccomp_data& aArgs, void* aux) { 1887 const auto cmd = static_cast<int>(aArgs.args[1]); 1888 switch (cmd) { 1889 // This process can't exec, so the actual close-on-exec flag 1890 // doesn't matter; have it always read as true and ignore writes. 1891 case F_GETFD: 1892 return O_CLOEXEC; 1893 case F_SETFD: 1894 return 0; 1895 default: 1896 return -ENOSYS; 1897 } 1898 } 1899 1900 const SandboxOpenedFiles* mFiles; 1901 1902 public: 1903 explicit GMPSandboxPolicy(const SandboxOpenedFiles* aFiles) : mFiles(aFiles) { 1904 // Used by the profiler to send data back to the parent process; 1905 // we are not enabling the file broker, so this will only work if 1906 // memfd_create is available. 1907 mMayCreateShmem = true; 1908 } 1909 1910 ~GMPSandboxPolicy() override = default; 1911 1912 ResultExpr EvaluateSyscall(int sysno) const override { 1913 switch (sysno) { 1914 // Simulate opening the plugin file. 1915 #ifdef __NR_open 1916 case __NR_open: 1917 #endif 1918 case __NR_openat: 1919 return Trap(OpenTrap, mFiles); 1920 1921 #if defined(__NR_stat64) || defined(__NR_stat) 1922 CASES_FOR_stat: 1923 return Trap(StatTrap, mFiles); 1924 #endif 1925 1926 case __NR_brk: 1927 return Allow(); 1928 case __NR_sched_get_priority_min: 1929 case __NR_sched_get_priority_max: 1930 return Allow(); 1931 case __NR_sched_getparam: 1932 case __NR_sched_getscheduler: 1933 case __NR_sched_setscheduler: { 1934 Arg<pid_t> pid(0); 1935 return If(pid == 0, Allow()).Else(Trap(SchedTrap, nullptr)); 1936 } 1937 1938 // For clock(3) on older glibcs; bug 1304220. 1939 case __NR_times: 1940 return Allow(); 1941 1942 // Bug 1372428 1943 case __NR_uname: 1944 return Trap(UnameTrap, nullptr); 1945 CASES_FOR_fcntl: 1946 return Trap(FcntlTrap, nullptr); 1947 1948 // Allow the same advice values as the default policy, but return 1949 // Error(ENOSYS) for other values. Because the Widevine CDM may probe 1950 // advice arguments, including invalid values, we don't want to return 1951 // InvalidSyscall(), as this will crash the process. So instead just 1952 // indicate such calls are not available. 1953 case __NR_madvise: { 1954 Arg<int> advice(2); 1955 return If(advice == MADV_DONTNEED, Allow()) 1956 .ElseIf(advice == MADV_FREE, Allow()) 1957 .ElseIf(advice == MADV_HUGEPAGE, Allow()) 1958 .ElseIf(advice == MADV_NOHUGEPAGE, Allow()) 1959 #ifdef MOZ_ASAN 1960 .ElseIf(advice == MADV_DONTDUMP, Allow()) 1961 #endif 1962 .ElseIf(advice == MADV_MERGEABLE, Error(EPERM)) // bug 1705045 1963 .Else(Error(ENOSYS)); 1964 } 1965 1966 // The profiler will try to readlink /proc/self/exe for native 1967 // stackwalking, but that's broken for several other reasons; 1968 // see discussion in bug 1770905. (That can be emulated by 1969 // pre-recording the result if/when we need it.) 1970 #ifdef __NR_readlink 1971 case __NR_readlink: 1972 #endif 1973 case __NR_readlinkat: 1974 return Error(EINVAL); 1975 1976 default: 1977 return SandboxPolicyCommon::EvaluateSyscall(sysno); 1978 } 1979 } 1980 }; 1981 1982 UniquePtr<sandbox::bpf_dsl::Policy> GetMediaSandboxPolicy( 1983 const SandboxOpenedFiles* aFiles) { 1984 return UniquePtr<sandbox::bpf_dsl::Policy>(new GMPSandboxPolicy(aFiles)); 1985 } 1986 1987 // The policy for the data decoder process is similar to the one for 1988 // media plugins, but the codec code is all in-tree so it's better 1989 // behaved and doesn't need special exceptions (or the ability to load 1990 // a plugin file). However, it does directly create shared memory 1991 // segments, so it may need file brokering. 1992 class RDDSandboxPolicy final : public SandboxPolicyCommon { 1993 public: 1994 explicit RDDSandboxPolicy(SandboxBrokerClient* aBroker) { 1995 mBroker = aBroker; 1996 mMayCreateShmem = true; 1997 } 1998 1999 #ifndef ANDROID 2000 Maybe<ResultExpr> EvaluateIpcCall(int aCall, int aArgShift) const override { 2001 // The Intel media driver uses SysV IPC (semaphores and shared 2002 // memory) on newer hardware models; it always uses this fixed 2003 // key, so we can restrict semget and shmget. Unfortunately, the 2004 // calls that operate on these resources take "identifiers", which 2005 // are unpredictable (by us) but guessable (by an adversary). 2006 static constexpr key_t kIntelKey = 'D' << 24 | 'V' << 8 | 'X' << 0; 2007 2008 switch (aCall) { 2009 case SEMGET: 2010 case SHMGET: { 2011 Arg<key_t> key(0 + aArgShift); 2012 return Some(If(key == kIntelKey, Allow()).Else(InvalidSyscall())); 2013 } 2014 2015 case SEMCTL: 2016 case SEMOP: 2017 case SEMTIMEDOP: 2018 case SHMCTL: 2019 case SHMAT: 2020 case SHMDT: 2021 return Some(Allow()); 2022 2023 default: 2024 return SandboxPolicyCommon::EvaluateIpcCall(aCall, aArgShift); 2025 } 2026 } 2027 #endif 2028 2029 Maybe<ResultExpr> EvaluateSocketCall(int aCall, 2030 bool aHasArgs) const override { 2031 switch (aCall) { 2032 // These are for X11. 2033 // 2034 // FIXME (bug 1884449): X11 is blocked now so we probably don't 2035 // need these, but they're relatively harmless. 2036 case SYS_GETSOCKNAME: 2037 case SYS_GETPEERNAME: 2038 case SYS_SHUTDOWN: 2039 return Some(Allow()); 2040 2041 case SYS_SOCKET: 2042 // Hardware-accelerated decode uses EGL to manage hardware surfaces. 2043 // When initialised it tries to connect to the Wayland server over a 2044 // UNIX socket. It still works fine if it can't connect to Wayland, so 2045 // don't let it create the socket (but don't kill the process for 2046 // trying). 2047 // 2048 // We also see attempts to connect to an X server on desktop 2049 // Linux sometimes (bug 1882598). 2050 return Some(Error(EACCES)); 2051 2052 default: 2053 return SandboxPolicyCommon::EvaluateSocketCall(aCall, aHasArgs); 2054 } 2055 } 2056 2057 ResultExpr EvaluateSyscall(int sysno) const override { 2058 switch (sysno) { 2059 case __NR_getrusage: 2060 return Allow(); 2061 2062 case __NR_ioctl: { 2063 Arg<unsigned long> request(1); 2064 auto shifted_type = request & kIoctlTypeMask; 2065 static constexpr unsigned long kDrmType = 2066 static_cast<unsigned long>('d') << _IOC_TYPESHIFT; 2067 // Note: 'b' is also the Binder device on Android. 2068 static constexpr unsigned long kDmaBufType = 2069 static_cast<unsigned long>('b') << _IOC_TYPESHIFT; 2070 #ifdef MOZ_ENABLE_V4L2 2071 // Type 'V' for V4L2, used for hw accelerated decode 2072 static constexpr unsigned long kVideoType = 2073 static_cast<unsigned long>('V') << _IOC_TYPESHIFT; 2074 #endif 2075 // nvidia non-tegra uses some ioctls from this range (but not actual 2076 // fbdev ioctls; nvidia uses values >= 200 for the NR field 2077 // (low 8 bits)) 2078 static constexpr unsigned long kFbDevType = 2079 static_cast<unsigned long>('F') << _IOC_TYPESHIFT; 2080 2081 #if defined(__aarch64__) 2082 // NVIDIA decoder, from Linux4Tegra 2083 // http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2024-May/328552.html 2084 static constexpr unsigned long kNvidiaNvmapType = 2085 static_cast<unsigned long>('N') << _IOC_TYPESHIFT; 2086 static constexpr unsigned long kNvidiaNvhostType = 2087 static_cast<unsigned long>('H') << _IOC_TYPESHIFT; 2088 #endif // defined(__aarch64__) 2089 2090 // Allow DRI and DMA-Buf for VA-API. Also allow V4L2 if enabled 2091 return If(shifted_type == kDrmType, Allow()) 2092 .ElseIf(shifted_type == kDmaBufType, Allow()) 2093 #ifdef MOZ_ENABLE_V4L2 2094 .ElseIf(shifted_type == kVideoType, Allow()) 2095 #endif 2096 // NVIDIA decoder from Linux4Tegra, this is specific to Tegra ARM64 SoC 2097 #if defined(__aarch64__) 2098 .ElseIf(shifted_type == kNvidiaNvmapType, Allow()) 2099 .ElseIf(shifted_type == kNvidiaNvhostType, Allow()) 2100 #endif // defined(__aarch64__) 2101 // Hack for nvidia non-tegra devices, which isn't supported yet: 2102 .ElseIf(shifted_type == kFbDevType, Error(ENOTTY)) 2103 .Else(SandboxPolicyCommon::EvaluateSyscall(sysno)); 2104 } 2105 2106 // Mesa/amdgpu 2107 case __NR_kcmp: 2108 return KcmpPolicyForMesa(); 2109 2110 // We use this in our DMABuf support code. 2111 case __NR_eventfd2: 2112 return Allow(); 2113 2114 // Allow the sched_* syscalls for the current thread only. 2115 // Mesa attempts to use them to optimize performance; often 2116 // this involves passing other threads' tids, which we can't 2117 // safely allow, but maybe a future Mesa version could fix that. 2118 case __NR_sched_getaffinity: 2119 case __NR_sched_setaffinity: 2120 case __NR_sched_getparam: 2121 case __NR_sched_setparam: 2122 case __NR_sched_getscheduler: 2123 case __NR_sched_setscheduler: 2124 case __NR_sched_getattr: 2125 case __NR_sched_setattr: { 2126 Arg<pid_t> pid(0); 2127 return If(pid == 0, Allow()).Else(Trap(SchedTrap, nullptr)); 2128 } 2129 2130 // The priority bounds are also used, sometimes (bug 1838675): 2131 case __NR_sched_get_priority_min: 2132 case __NR_sched_get_priority_max: 2133 return Allow(); 2134 2135 // nvidia tries to mknod(!) its devices; that won't work anyway, 2136 // so quietly reject it. 2137 #ifdef __NR_mknod 2138 case __NR_mknod: 2139 #endif 2140 case __NR_mknodat: 2141 return Error(EPERM); 2142 2143 // Used by the nvidia GPU driver, including in multi-GPU 2144 // systems when we intend to use a non-nvidia GPU. (Also used 2145 // by Mesa for its shader cache, but we disable that in this 2146 // process.) 2147 CASES_FOR_fstatfs: 2148 return Allow(); 2149 2150 // nvidia drivers may attempt to spawn nvidia-modprobe 2151 case __NR_clone: 2152 return ClonePolicy(Error(EPERM)); 2153 #ifdef __NR_fork 2154 case __NR_fork: 2155 return Error(ENOSYS); 2156 #endif 2157 2158 // Pass through the common policy. 2159 default: 2160 return SandboxPolicyCommon::EvaluateSyscall(sysno); 2161 } 2162 } 2163 }; 2164 2165 UniquePtr<sandbox::bpf_dsl::Policy> GetDecoderSandboxPolicy( 2166 SandboxBrokerClient* aMaybeBroker) { 2167 return UniquePtr<sandbox::bpf_dsl::Policy>( 2168 new RDDSandboxPolicy(aMaybeBroker)); 2169 } 2170 2171 // Basically a clone of RDDSandboxPolicy until we know exactly what 2172 // the SocketProcess sandbox looks like. 2173 class SocketProcessSandboxPolicy final : public SandboxPolicyCommon { 2174 private: 2175 SocketProcessSandboxParams mParams; 2176 2177 bool BelowLevel(int aLevel) const { return mParams.mLevel < aLevel; } 2178 2179 public: 2180 explicit SocketProcessSandboxPolicy(SandboxBrokerClient* aBroker, 2181 SocketProcessSandboxParams&& aParams) 2182 : mParams(std::move(aParams)) { 2183 mBroker = aBroker; 2184 mMayCreateShmem = true; 2185 } 2186 2187 static intptr_t FcntlTrap(const arch_seccomp_data& aArgs, void* aux) { 2188 const auto cmd = static_cast<int>(aArgs.args[1]); 2189 switch (cmd) { 2190 // This process can't exec, so the actual close-on-exec flag 2191 // doesn't matter; have it always read as true and ignore writes. 2192 case F_GETFD: 2193 return O_CLOEXEC; 2194 case F_SETFD: 2195 return 0; 2196 default: 2197 return -ENOSYS; 2198 } 2199 } 2200 2201 BoolExpr MsgFlagsAllowed(const Arg<int>& aFlags) const override { 2202 // Necko might use advanced networking features, and the sandbox 2203 // is relatively permissive compared to content, so this is a 2204 // default-allow policy. 2205 // 2206 // However, `MSG_OOB` has historically been buggy, and the way it 2207 // maps to TCP is notoriously broken (see RFC 6093), so it should 2208 // be safe to block. 2209 return (aFlags & MSG_OOB) == 0; 2210 } 2211 2212 Maybe<ResultExpr> EvaluateSocketCall(int aCall, 2213 bool aHasArgs) const override { 2214 switch (aCall) { 2215 case SYS_SOCKET: 2216 case SYS_CONNECT: 2217 case SYS_BIND: 2218 return Some(Allow()); 2219 2220 // sendmsg and recvmmsg needed for HTTP3/QUIC UDP IO. Note sendmsg is 2221 // allowed in SandboxPolicyCommon. 2222 case SYS_RECVMMSG: 2223 // Required for the DNS Resolver thread. 2224 case SYS_SENDMMSG: 2225 if (aHasArgs) { 2226 Arg<int> flags(3); 2227 return Some( 2228 If(MsgFlagsAllowed(flags), Allow()).Else(InvalidSyscall())); 2229 } 2230 return Some(UnpackSocketcallOrAllow()); 2231 2232 case SYS_GETSOCKOPT: 2233 case SYS_SETSOCKOPT: 2234 case SYS_GETSOCKNAME: 2235 case SYS_GETPEERNAME: 2236 case SYS_SHUTDOWN: 2237 case SYS_ACCEPT: 2238 case SYS_ACCEPT4: 2239 return Some(Allow()); 2240 2241 default: 2242 return SandboxPolicyCommon::EvaluateSocketCall(aCall, aHasArgs); 2243 } 2244 } 2245 2246 ResultExpr PrctlPolicy() const override { 2247 Arg<int> op(0); 2248 Arg<int> arg2(1); 2249 return Switch(op) 2250 .Case(PR_SET_VMA, // Tagging of anonymous memory mappings 2251 If(arg2 == PR_SET_VMA_ANON_NAME, Allow()).Else(InvalidSyscall())) 2252 .Cases({PR_SET_NAME, // Thread creation 2253 PR_SET_DUMPABLE, // Crash reporting 2254 PR_SET_PTRACER}, // Debug-mode crash handling 2255 Allow()) 2256 #if defined(MOZ_PROFILE_GENERATE) 2257 .Case(PR_GET_PDEATHSIG, Allow()) 2258 #endif // defined(MOZ_PROFILE_GENERATE) 2259 .Default(InvalidSyscall()); 2260 } 2261 2262 ResultExpr EvaluateSyscall(int sysno) const override { 2263 switch (sysno) { 2264 case __NR_getrusage: 2265 return Allow(); 2266 2267 case __NR_ioctl: { 2268 Arg<unsigned long> request(1); 2269 auto shifted_type = request & kIoctlTypeMask; 2270 2271 // Rust's stdlib seems to use FIOCLEX instead of equivalent fcntls. 2272 return Switch(request) 2273 .Case(FIOCLEX, Allow()) 2274 // Rust's stdlib also uses FIONBIO instead of equivalent fcntls. 2275 .Case(FIONBIO, Allow()) 2276 // This is used by PR_Available in nsSocketInputStream::Available. 2277 .Case(FIONREAD, Allow()) 2278 // WebRTC needs interface information (bug 1975576) 2279 .Cases({SIOCGIFNAME, SIOCGIFFLAGS, SIOCETHTOOL, SIOCGIWRATE}, 2280 Allow()) 2281 .Default( 2282 // Allow anything that isn't a tty ioctl (if level < 2) 2283 If(BelowLevel(2) ? shifted_type != kTtyIoctls 2284 : BoolConst(false), 2285 Allow()) 2286 .Else(SandboxPolicyCommon::EvaluateSyscall(sysno))); 2287 } 2288 2289 CASES_FOR_fcntl: { 2290 Arg<int> cmd(1); 2291 return Switch(cmd) 2292 .Case(F_DUPFD_CLOEXEC, Allow()) 2293 // Nvidia GL and fontconfig (newer versions) use fcntl file locking. 2294 .Case(F_SETLK, Allow()) 2295 #ifdef F_SETLK64 2296 .Case(F_SETLK64, Allow()) 2297 #endif 2298 // Pulseaudio uses F_SETLKW, as does fontconfig. 2299 .Case(F_SETLKW, Allow()) 2300 #ifdef F_SETLKW64 2301 .Case(F_SETLKW64, Allow()) 2302 #endif 2303 .Default(SandboxPolicyCommon::EvaluateSyscall(sysno)); 2304 } 2305 2306 #ifdef DESKTOP 2307 // This section is borrowed from ContentSandboxPolicy 2308 CASES_FOR_getrlimit: 2309 CASES_FOR_getresuid: 2310 CASES_FOR_getresgid: 2311 return Allow(); 2312 2313 case __NR_prlimit64: { 2314 // Allow only the getrlimit() use case. (glibc seems to use 2315 // only pid 0 to indicate the current process; pid == getpid() 2316 // is equivalent and could also be allowed if needed.) 2317 Arg<pid_t> pid(0); 2318 // This is really a const struct ::rlimit*, but Arg<> doesn't 2319 // work with pointers, only integer types. 2320 Arg<uintptr_t> new_limit(2); 2321 return If(AllOf(pid == 0, new_limit == 0), Allow()) 2322 .Else(InvalidSyscall()); 2323 } 2324 #endif // DESKTOP 2325 2326 default: 2327 return SandboxPolicyCommon::EvaluateSyscall(sysno); 2328 } 2329 } 2330 }; 2331 2332 UniquePtr<sandbox::bpf_dsl::Policy> GetSocketProcessSandboxPolicy( 2333 SandboxBrokerClient* aMaybeBroker, SocketProcessSandboxParams&& aParams) { 2334 return UniquePtr<sandbox::bpf_dsl::Policy>( 2335 new SocketProcessSandboxPolicy(aMaybeBroker, std::move(aParams))); 2336 } 2337 2338 class UtilitySandboxPolicy : public SandboxPolicyCommon { 2339 public: 2340 explicit UtilitySandboxPolicy(SandboxBrokerClient* aBroker) { 2341 mBroker = aBroker; 2342 mMayCreateShmem = true; 2343 } 2344 2345 ResultExpr PrctlPolicy() const override { 2346 Arg<int> op(0); 2347 Arg<int> arg2(1); 2348 return Switch(op) 2349 .Case(PR_SET_VMA, // Tagging of anonymous memory mappings 2350 If(arg2 == PR_SET_VMA_ANON_NAME, Allow()).Else(InvalidSyscall())) 2351 .Cases({PR_SET_NAME, // Thread creation 2352 PR_SET_DUMPABLE, // Crash reporting 2353 PR_SET_PTRACER, // Debug-mode crash handling 2354 PR_GET_PDEATHSIG}, // PGO profiling, cf 2355 // https://reviews.llvm.org/D29954 2356 Allow()) 2357 .Case(PR_CAPBSET_READ, // libcap.so.2 loaded by libpulse.so.0 2358 // queries for capabilities 2359 Error(EINVAL)) 2360 #if defined(MOZ_PROFILE_GENERATE) 2361 .Case(PR_GET_PDEATHSIG, Allow()) 2362 #endif // defined(MOZ_PROFILE_GENERATE) 2363 .Default(InvalidSyscall()); 2364 } 2365 2366 ResultExpr EvaluateSyscall(int sysno) const override { 2367 switch (sysno) { 2368 case __NR_getrusage: 2369 return Allow(); 2370 2371 // Required by FFmpeg 2372 case __NR_get_mempolicy: 2373 return Allow(); 2374 2375 // Required by libnuma for FFmpeg 2376 case __NR_sched_getaffinity: { 2377 Arg<pid_t> pid(0); 2378 return If(pid == 0, Allow()).Else(Trap(SchedTrap, nullptr)); 2379 } 2380 2381 // Required by libnuma for FFmpeg 2382 case __NR_set_mempolicy: 2383 return Error(ENOSYS); 2384 2385 // Pass through the common policy. 2386 default: 2387 return SandboxPolicyCommon::EvaluateSyscall(sysno); 2388 } 2389 } 2390 }; 2391 2392 UniquePtr<sandbox::bpf_dsl::Policy> GetUtilitySandboxPolicy( 2393 SandboxBrokerClient* aMaybeBroker) { 2394 return UniquePtr<sandbox::bpf_dsl::Policy>( 2395 new UtilitySandboxPolicy(aMaybeBroker)); 2396 } 2397 2398 } // namespace mozilla