SharedMemoryPlatform_posix.cpp (15829B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* This source code was derived from Chromium code, and as such is also subject 8 * to the [Chromium license](ipc/chromium/src/LICENSE). */ 9 10 #include "SharedMemoryPlatform.h" 11 12 #include <errno.h> 13 #include <fcntl.h> 14 #include <sys/mman.h> 15 #include <sys/stat.h> 16 #include <unistd.h> 17 18 #ifdef XP_LINUX 19 # include "base/linux_memfd_defs.h" 20 #endif 21 #ifdef MOZ_WIDGET_GTK 22 # include "mozilla/WidgetUtilsGtk.h" 23 #endif 24 25 #ifdef __FreeBSD__ 26 # include <sys/capsicum.h> 27 #endif 28 29 #ifdef MOZ_VALGRIND 30 # include <valgrind/valgrind.h> 31 #endif 32 33 #include "base/eintr_wrapper.h" 34 #include "base/string_util.h" 35 #include "mozilla/Atomics.h" 36 #include "mozilla/Maybe.h" 37 #include "mozilla/ProfilerThreadSleep.h" 38 #include "mozilla/UniquePtrExtensions.h" 39 #include "prenv.h" 40 #include "nsXULAppAPI.h" // for XRE_IsParentProcess 41 42 namespace mozilla::ipc::shared_memory { 43 44 // memfd_create is a nonstandard interface for creating anonymous 45 // shared memory accessible as a file descriptor but not tied to any 46 // filesystem. It first appeared in Linux 3.17, and was adopted by 47 // FreeBSD in version 13. 48 49 #if !defined(HAVE_MEMFD_CREATE) && defined(XP_LINUX) && \ 50 defined(SYS_memfd_create) 51 52 // Older libc versions (e.g., glibc before 2.27) don't have the 53 // wrapper, but we can supply our own; see `linux_memfd_defs.h`. 54 55 static int memfd_create(const char* aName, unsigned int aFlags) { 56 return syscall(SYS_memfd_create, aName, aFlags); 57 } 58 59 # define HAVE_MEMFD_CREATE 1 60 #endif 61 62 // memfd supports having "seals" applied to the file, to prevent 63 // various types of changes (which apply to all fds referencing the 64 // file). Unfortunately, we can't rely on F_SEAL_WRITE to implement 65 // Freeze(); see the comments in ReadOnlyCopy() below. 66 // 67 // Instead, to prevent a child process from regaining write access to 68 // a read-only copy, the OS must also provide a way to remove write 69 // permissions at the file descriptor level. This next section 70 // attempts to accomplish that. 71 72 #ifdef HAVE_MEMFD_CREATE 73 # ifdef XP_LINUX 74 # define USE_MEMFD_CREATE 1 75 76 // To create a read-only duplicate of an fd, we can use procfs; the 77 // same operation could restore write access, but sandboxing prevents 78 // child processes from accessing /proc. 79 // 80 // (Note: if this ever changes to not use /proc, also reconsider how 81 // and if HaveMemfd should check whether this works.) 82 83 static int DupReadOnly(int aFd) { 84 MOZ_DIAGNOSTIC_ASSERT(XRE_IsParentProcess()); 85 std::string path = StringPrintf("/proc/self/fd/%d", aFd); 86 // procfs opens probably won't EINTR, but checking for it can't hurt 87 return HANDLE_EINTR(open(path.c_str(), O_RDONLY | O_CLOEXEC)); 88 } 89 90 # elif defined(__FreeBSD__) 91 # define USE_MEMFD_CREATE 1 92 93 // FreeBSD's Capsicum framework allows irrevocably restricting the 94 // operations permitted on a file descriptor. 95 96 static int DupReadOnly(int aFd) { 97 int rofd = dup(aFd); 98 if (rofd < 0) { 99 return -1; 100 } 101 102 cap_rights_t rights; 103 cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R); 104 if (cap_rights_limit(rofd, &rights) < 0) { 105 int err = errno; 106 close(rofd); 107 errno = err; 108 return -1; 109 } 110 111 return rofd; 112 } 113 114 # else // unhandled OS 115 # warning "OS has memfd_create but no DupReadOnly implementation" 116 # endif // OS selection 117 #endif // HAVE_MEMFD_CREATE 118 119 // Runtime detection for memfd support. Returns `Nothing()` if not 120 // supported, or `Some(flags)` if supported, where `flags` contains 121 // flags like `MFD_CLOEXEC` that should be passed to all calls. 122 static Maybe<unsigned> HaveMemfd() { 123 #ifdef USE_MEMFD_CREATE 124 static const Maybe<unsigned> kHave = []() -> Maybe<unsigned> { 125 unsigned flags = MFD_CLOEXEC | MFD_ALLOW_SEALING; 126 # ifdef MFD_NOEXEC_SEAL 127 flags |= MFD_NOEXEC_SEAL; 128 # endif 129 130 mozilla::UniqueFileHandle fd(memfd_create("mozilla-ipc-test", flags)); 131 132 # ifdef MFD_NOEXEC_SEAL 133 if (!fd && errno == EINVAL) { 134 flags &= ~MFD_NOEXEC_SEAL; 135 fd.reset(memfd_create("mozilla-ipc-test", flags)); 136 } 137 # endif 138 139 if (!fd) { 140 MOZ_ASSERT(errno == ENOSYS); 141 return Nothing(); 142 } 143 144 // Verify that DupReadOnly works; on Linux it's known to fail if: 145 // 146 // * SELinux assigns the memfd a type for which this process's 147 // domain doesn't have "open" permission; this is always the 148 // case on Android but could occur on desktop as well 149 // 150 // * /proc (used by the DupReadOnly implementation) isn't mounted, 151 // which is a configuration that the Tor Browser project is 152 // interested in as a way to reduce fingerprinting risk 153 // 154 // Sandboxed processes on Linux also can't use it if sandboxing 155 // has already been started, but that's expected. It should be 156 // safe for sandboxed child processes to use memfd even if an 157 // unsandboxed process couldn't freeze them, because freezing 158 // isn't allowed (or meaningful) for memory created by another 159 // process. 160 161 if (XRE_IsParentProcess()) { 162 mozilla::UniqueFileHandle rofd(DupReadOnly(fd.get())); 163 if (!rofd) { 164 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 165 "read-only dup failed ({}); not using memfd", 166 strerror(errno)); 167 return Nothing(); 168 } 169 } 170 return Some(flags); 171 }(); 172 return kHave; 173 #else 174 return Nothing(); 175 #endif // USE_MEMFD_CREATE 176 } 177 178 bool AppendPosixShmPrefix(std::string* aStr, pid_t aPid) { 179 if (HaveMemfd()) { 180 return false; 181 } 182 *aStr += '/'; 183 #ifdef MOZ_WIDGET_GTK 184 // The Snap package environment doesn't provide a private /dev/shm 185 // (it's used for communication with services like PulseAudio); 186 // instead AppArmor is used to restrict access to it. Anything with 187 // this prefix is allowed: 188 if (const char* snap = mozilla::widget::GetSnapInstanceName()) { 189 StringAppendF(aStr, "snap.%s.", snap); 190 } 191 #endif // XP_LINUX 192 // Hopefully the "implementation defined" name length limit is long 193 // enough for this. 194 StringAppendF(aStr, "org.mozilla.ipc.%d.", static_cast<int>(aPid)); 195 return true; 196 } 197 198 // `freezable` is a pointer because `Maybe` nor `std::optional` can store a 199 // reference. If there is no freezable, pass as nullptr. 200 static Maybe<PlatformHandle> CreateImpl(size_t aSize, 201 PlatformHandle* aFreezable) { 202 MOZ_ASSERT(aSize > 0); 203 204 MOZ_DIAGNOSTIC_ASSERT( 205 !aFreezable || XRE_IsParentProcess(), 206 "Child processes may not create freezable shared memory"); 207 208 mozilla::UniqueFileHandle fd; 209 mozilla::UniqueFileHandle frozen_fd; 210 211 #ifdef USE_MEMFD_CREATE 212 if (auto flags = HaveMemfd()) { 213 fd.reset(memfd_create("mozilla-ipc", *flags)); 214 if (!fd) { 215 // In general it's too late to fall back here -- in a sandboxed 216 // child process, shm_open is already blocked. And it shouldn't 217 // be necessary. 218 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 219 "failed to create memfd: {}", strerror(errno)); 220 return Nothing(); 221 } 222 if (aFreezable) { 223 frozen_fd.reset(DupReadOnly(fd.get())); 224 if (!frozen_fd) { 225 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 226 "failed to create read-only memfd: {}", strerror(errno)); 227 return Nothing(); 228 } 229 } 230 } 231 #endif 232 233 if (!fd) { 234 // Generic Unix: shm_open + shm_unlink 235 do { 236 // The names don't need to be unique, but it saves time if they 237 // usually are. 238 static mozilla::Atomic<size_t> sNameCounter; 239 std::string name; 240 CHECK(AppendPosixShmPrefix(&name, getpid())); 241 StringAppendF(&name, "%zu", sNameCounter++); 242 // O_EXCL means the names being predictable shouldn't be a problem. 243 fd.reset(HANDLE_EINTR( 244 shm_open(name.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600))); 245 if (fd) { 246 if (aFreezable) { 247 frozen_fd.reset(HANDLE_EINTR(shm_open(name.c_str(), O_RDONLY, 0400))); 248 if (!frozen_fd) { 249 int open_err = errno; 250 shm_unlink(name.c_str()); 251 DLOG(FATAL) << "failed to re-open freezable shm: " 252 << strerror(open_err); 253 return Nothing(); 254 } 255 } 256 if (shm_unlink(name.c_str()) != 0) { 257 // This shouldn't happen, but if it does: assume the file is 258 // in fact leaked, and bail out now while it's still 0-length. 259 DLOG(FATAL) << "failed to unlink shm: " << strerror(errno); 260 return Nothing(); 261 } 262 } 263 } while (!fd && errno == EEXIST); 264 } 265 266 if (!fd) { 267 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to open shm: {}", 268 strerror(errno)); 269 return Nothing(); 270 } 271 272 mozilla::Maybe<int> fallocateError; 273 #if defined(HAVE_POSIX_FALLOCATE) 274 // Using posix_fallocate will ensure that there's actually space for this 275 // file. Otherwise we end up with a sparse file that can give SIGBUS if we 276 // run out of space while writing to it. (This doesn't apply to memfd.) 277 if (!HaveMemfd()) { 278 int rv; 279 // Avoid repeated interruptions of posix_fallocate by the profiler's 280 // SIGPROF sampling signal. Indicating "thread sleep" here means we'll 281 // get up to one interruption but not more. See bug 1658847 for more. 282 // This has to be scoped outside the HANDLE_RV_EINTR retry loop. 283 { 284 AUTO_PROFILER_THREAD_SLEEP; 285 286 rv = HANDLE_RV_EINTR( 287 posix_fallocate(fd.get(), 0, static_cast<off_t>(aSize))); 288 } 289 290 // Some filesystems have trouble with posix_fallocate. For now, we must 291 // fallback ftruncate and accept the allocation failures like we do 292 // without posix_fallocate. 293 // See https://bugzilla.mozilla.org/show_bug.cgi?id=1618914 294 if (rv != 0 && rv != EOPNOTSUPP && rv != EINVAL && rv != ENODEV) { 295 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 296 "fallocate failed to set shm size: {}", strerror(rv)); 297 return Nothing(); 298 } 299 fallocateError = mozilla::Some(rv); 300 } 301 #endif 302 303 // If posix_fallocate isn't supported / relevant for this type of 304 // file (either failed with an expected error, or wasn't attempted), 305 // then set the size with ftruncate: 306 if (fallocateError != mozilla::Some(0)) { 307 int rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast<off_t>(aSize))); 308 if (rv != 0) { 309 int ftruncate_errno = errno; 310 if (fallocateError) { 311 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 312 "fallocate failed to set shm size: {}", 313 strerror(*fallocateError)); 314 } 315 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 316 "fallocate failed to set shm size: {}", 317 strerror(ftruncate_errno)); 318 return Nothing(); 319 } 320 } 321 322 if (aFreezable) { 323 *aFreezable = std::move(frozen_fd); 324 } 325 return Some(std::move(fd)); 326 } 327 328 bool UsingPosixShm() { return !HaveMemfd(); } 329 330 bool Platform::Create(MutableHandle& aHandle, size_t aSize) { 331 if (auto ph = CreateImpl(aSize, nullptr)) { 332 aHandle.mHandle = std::move(*ph); 333 aHandle.SetSize(aSize); 334 return true; 335 } 336 return false; 337 } 338 339 bool Platform::CreateFreezable(FreezableHandle& aHandle, size_t aSize) { 340 if (auto ph = CreateImpl(aSize, &aHandle.mFrozenFile)) { 341 aHandle.mHandle = std::move(*ph); 342 aHandle.SetSize(aSize); 343 return true; 344 } 345 return false; 346 } 347 348 PlatformHandle Platform::CloneHandle(const PlatformHandle& aHandle) { 349 const int new_fd = dup(aHandle.get()); 350 if (new_fd < 0) { 351 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 352 "failed to duplicate file descriptor: {}", strerror(errno)); 353 return nullptr; 354 } 355 return mozilla::UniqueFileHandle(new_fd); 356 } 357 358 bool Platform::Freeze(FreezableHandle& aHandle) { 359 #ifdef USE_MEMFD_CREATE 360 # ifdef MOZ_VALGRIND 361 // Valgrind allows memfd_create but doesn't understand F_ADD_SEALS. 362 static const bool haveSeals = RUNNING_ON_VALGRIND == 0; 363 # else 364 static const bool haveSeals = true; 365 # endif 366 static const bool useSeals = !PR_GetEnv("MOZ_SHM_NO_SEALS"); 367 if (HaveMemfd() && haveSeals && useSeals) { 368 // Seals are added to the file as defense-in-depth. The primary 369 // method of access control is creating a read-only fd (using 370 // procfs in this case) and requiring that sandboxes processes not 371 // have access to /proc/self/fd to regain write permission; this 372 // is the same as with shm_open. 373 // 374 // Unfortunately, F_SEAL_WRITE is unreliable: if the process 375 // forked while there was a writeable mapping, it will inherit a 376 // copy of the mapping, which causes the seal to fail. 377 // 378 // (Also, in the future we may want to split this into separate 379 // classes for mappings and shared memory handles, which would 380 // complicate identifying the case where `F_SEAL_WRITE` would be 381 // possible even in the absence of races with fork.) 382 // 383 // However, Linux 5.1 added F_SEAL_FUTURE_WRITE, which prevents 384 // write operations afterwards, but existing writeable mappings 385 // are unaffected (similar to ashmem protection semantics). 386 387 const int seals = F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL; 388 int sealError = EINVAL; 389 390 # ifdef F_SEAL_FUTURE_WRITE 391 sealError = fcntl(aHandle.mHandle.get(), F_ADD_SEALS, 392 seals | F_SEAL_FUTURE_WRITE) == 0 393 ? 0 394 : errno; 395 # endif // F_SEAL_FUTURE_WRITE 396 if (sealError == EINVAL) { 397 sealError = 398 fcntl(aHandle.mHandle.get(), F_ADD_SEALS, seals) == 0 ? 0 : errno; 399 } 400 if (sealError != 0) { 401 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, 402 "failed to seal memfd: {}", strerror(errno)); 403 return false; 404 } 405 } 406 #else // !USE_MEMFD_CREATE 407 MOZ_ASSERT(!HaveMemfd()); 408 #endif 409 410 MOZ_ASSERT(aHandle.mFrozenFile); 411 MOZ_ASSERT(aHandle.mHandle); 412 aHandle.mHandle = std::move(aHandle.mFrozenFile); 413 MOZ_ASSERT(aHandle.mHandle); 414 415 return true; 416 } 417 418 Maybe<void*> Platform::Map(const HandleBase& aHandle, uint64_t aOffset, 419 size_t aSize, void* aFixedAddress, bool aReadOnly) { 420 // Don't use MAP_FIXED when a fixed_address was specified, since that can 421 // replace pages that are alread mapped at that address. 422 void* mem = 423 mmap(aFixedAddress, aSize, PROT_READ | (aReadOnly ? 0 : PROT_WRITE), 424 MAP_SHARED, aHandle.mHandle.get(), aOffset); 425 426 if (mem == MAP_FAILED) { 427 MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "call to mmap failed: {}", 428 strerror(errno)); 429 return Nothing(); 430 } 431 432 if (aFixedAddress && mem != aFixedAddress) { 433 DebugOnly<bool> munmap_succeeded = munmap(mem, aSize) == 0; 434 MOZ_ASSERT(munmap_succeeded, "call to munmap failed"); 435 return Nothing(); 436 } 437 438 return Some(mem); 439 } 440 441 void Platform::Unmap(void* aMemory, size_t aSize) { munmap(aMemory, aSize); } 442 443 bool Platform::Protect(char* aAddr, size_t aSize, Access aAccess) { 444 int flags = PROT_NONE; 445 if (aAccess & AccessRead) flags |= PROT_READ; 446 if (aAccess & AccessWrite) flags |= PROT_WRITE; 447 448 return 0 == mprotect(aAddr, aSize, flags); 449 } 450 451 void* Platform::FindFreeAddressSpace(size_t aSize) { 452 #ifndef __FreeBSD__ 453 constexpr int flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE; 454 #else 455 constexpr int flags = MAP_ANONYMOUS | MAP_PRIVATE; 456 #endif 457 void* memory = mmap(nullptr, aSize, PROT_NONE, flags, -1, 0); 458 if (memory == MAP_FAILED) { 459 return nullptr; 460 } 461 munmap(memory, aSize); 462 return memory; 463 } 464 465 size_t Platform::PageSize() { return sysconf(_SC_PAGESIZE); } 466 467 size_t Platform::AllocationGranularity() { return PageSize(); } 468 469 bool Platform::IsSafeToMap(const PlatformHandle&) { return true; } 470 471 } // namespace mozilla::ipc::shared_memory