GenerateAtomicOperations.py (36179B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 # This script generates jit/AtomicOperationsGenerated.h 6 # 7 # See the big comment in jit/AtomicOperations.h for an explanation. 8 9 import buildconfig 10 11 is_64bit = "JS_64BIT" in buildconfig.defines 12 cpu_arch = buildconfig.substs["TARGET_CPU"] 13 is_gcc = buildconfig.substs["CC_TYPE"] == "gcc" 14 15 16 def fmt_insn(s): 17 return '"' + s + '\\n\\t"\n' 18 19 20 def gen_seqcst(fun_name): 21 if cpu_arch in ("x86", "x86_64"): 22 return r""" 23 INLINE_ATTR void %(fun_name)s() { 24 asm volatile ("mfence\n\t" ::: "memory"); 25 }""" % { 26 "fun_name": fun_name, 27 } 28 if cpu_arch == "aarch64": 29 return r""" 30 INLINE_ATTR void %(fun_name)s() { 31 asm volatile ("dmb ish\n\t" ::: "memory"); 32 }""" % { 33 "fun_name": fun_name, 34 } 35 if cpu_arch == "arm": 36 return r""" 37 INLINE_ATTR void %(fun_name)s() { 38 asm volatile ("dmb sy\n\t" ::: "memory"); 39 }""" % { 40 "fun_name": fun_name, 41 } 42 raise Exception("Unexpected arch") 43 44 45 def gen_load(fun_name, cpp_type, size, barrier): 46 # NOTE: the assembly code must match the generated code in: 47 # - CacheIRCompiler::emitAtomicsLoadResult 48 # - LIRGenerator::visitLoadUnboxedScalar 49 # - CodeGenerator::visitAtomicLoad64 (on 64-bit platforms) 50 # - MacroAssembler::wasmLoad 51 if cpu_arch in ("x86", "x86_64"): 52 insns = "" 53 if size == 8: 54 insns += fmt_insn("movb (%[arg]), %[res]") 55 elif size == 16: 56 insns += fmt_insn("movw (%[arg]), %[res]") 57 elif size == 32: 58 insns += fmt_insn("movl (%[arg]), %[res]") 59 else: 60 assert size == 64 61 insns += fmt_insn("movq (%[arg]), %[res]") 62 return """ 63 INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) { 64 %(cpp_type)s res; 65 asm volatile (%(insns)s 66 : [res] "=r" (res) 67 : [arg] "r" (arg) 68 : "memory"); 69 return res; 70 }""" % { 71 "cpp_type": cpp_type, 72 "fun_name": fun_name, 73 "insns": insns, 74 } 75 if cpu_arch == "aarch64": 76 insns = "" 77 if size == 8: 78 insns += fmt_insn("ldrb %w[res], [%x[arg]]") 79 elif size == 16: 80 insns += fmt_insn("ldrh %w[res], [%x[arg]]") 81 elif size == 32: 82 insns += fmt_insn("ldr %w[res], [%x[arg]]") 83 else: 84 assert size == 64 85 insns += fmt_insn("ldr %x[res], [%x[arg]]") 86 if barrier: 87 insns += fmt_insn("dmb ish") 88 return """ 89 INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) { 90 %(cpp_type)s res; 91 asm volatile (%(insns)s 92 : [res] "=r" (res) 93 : [arg] "r" (arg) 94 : "memory"); 95 return res; 96 }""" % { 97 "cpp_type": cpp_type, 98 "fun_name": fun_name, 99 "insns": insns, 100 } 101 if cpu_arch == "arm": 102 insns = "" 103 if size == 8: 104 insns += fmt_insn("ldrb %[res], [%[arg]]") 105 elif size == 16: 106 insns += fmt_insn("ldrh %[res], [%[arg]]") 107 else: 108 assert size == 32 109 insns += fmt_insn("ldr %[res], [%[arg]]") 110 if barrier: 111 insns += fmt_insn("dmb sy") 112 return """ 113 INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) { 114 %(cpp_type)s res; 115 asm volatile (%(insns)s 116 : [res] "=r" (res) 117 : [arg] "r" (arg) 118 : "memory"); 119 return res; 120 }""" % { 121 "cpp_type": cpp_type, 122 "fun_name": fun_name, 123 "insns": insns, 124 } 125 raise Exception("Unexpected arch") 126 127 128 def gen_store(fun_name, cpp_type, size, barrier): 129 # NOTE: the assembly code must match the generated code in: 130 # - CacheIRCompiler::emitAtomicsStoreResult 131 # - LIRGenerator::visitStoreUnboxedScalar 132 # - CodeGenerator::visitAtomicStore64 (on 64-bit platforms) 133 # - MacroAssembler::wasmStore 134 if cpu_arch in ("x86", "x86_64"): 135 insns = "" 136 if size == 8: 137 insns += fmt_insn("movb %[val], (%[addr])") 138 elif size == 16: 139 insns += fmt_insn("movw %[val], (%[addr])") 140 elif size == 32: 141 insns += fmt_insn("movl %[val], (%[addr])") 142 else: 143 assert size == 64 144 insns += fmt_insn("movq %[val], (%[addr])") 145 if barrier: 146 insns += fmt_insn("mfence") 147 return """ 148 INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 149 asm volatile (%(insns)s 150 : 151 : [addr] "r" (addr), [val] "r"(val) 152 : "memory"); 153 }""" % { 154 "cpp_type": cpp_type, 155 "fun_name": fun_name, 156 "insns": insns, 157 } 158 if cpu_arch == "aarch64": 159 insns = "" 160 if barrier: 161 insns += fmt_insn("dmb ish") 162 if size == 8: 163 insns += fmt_insn("strb %w[val], [%x[addr]]") 164 elif size == 16: 165 insns += fmt_insn("strh %w[val], [%x[addr]]") 166 elif size == 32: 167 insns += fmt_insn("str %w[val], [%x[addr]]") 168 else: 169 assert size == 64 170 insns += fmt_insn("str %x[val], [%x[addr]]") 171 if barrier: 172 insns += fmt_insn("dmb ish") 173 return """ 174 INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 175 asm volatile (%(insns)s 176 : 177 : [addr] "r" (addr), [val] "r"(val) 178 : "memory"); 179 }""" % { 180 "cpp_type": cpp_type, 181 "fun_name": fun_name, 182 "insns": insns, 183 } 184 if cpu_arch == "arm": 185 insns = "" 186 if barrier: 187 insns += fmt_insn("dmb sy") 188 if size == 8: 189 insns += fmt_insn("strb %[val], [%[addr]]") 190 elif size == 16: 191 insns += fmt_insn("strh %[val], [%[addr]]") 192 else: 193 assert size == 32 194 insns += fmt_insn("str %[val], [%[addr]]") 195 if barrier: 196 insns += fmt_insn("dmb sy") 197 return """ 198 INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 199 asm volatile (%(insns)s 200 : 201 : [addr] "r" (addr), [val] "r"(val) 202 : "memory"); 203 }""" % { 204 "cpp_type": cpp_type, 205 "fun_name": fun_name, 206 "insns": insns, 207 } 208 raise Exception("Unexpected arch") 209 210 211 def gen_exchange(fun_name, cpp_type, size): 212 # NOTE: the assembly code must match the generated code in: 213 # - MacroAssembler::atomicExchange 214 # - MacroAssembler::atomicExchange64 (on 64-bit platforms) 215 if cpu_arch in ("x86", "x86_64"): 216 # Request an input/output register for `val` so that we can simply XCHG it 217 # with *addr. 218 insns = "" 219 if size == 8: 220 insns += fmt_insn("xchgb %[val], (%[addr])") 221 elif size == 16: 222 insns += fmt_insn("xchgw %[val], (%[addr])") 223 elif size == 32: 224 insns += fmt_insn("xchgl %[val], (%[addr])") 225 else: 226 assert size == 64 227 insns += fmt_insn("xchgq %[val], (%[addr])") 228 return """ 229 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 230 asm volatile (%(insns)s 231 : [val] "+r" (val) 232 : [addr] "r" (addr) 233 : "memory"); 234 return val; 235 }""" % { 236 "cpp_type": cpp_type, 237 "fun_name": fun_name, 238 "insns": insns, 239 } 240 if cpu_arch == "aarch64": 241 insns = "" 242 insns += fmt_insn("dmb ish") 243 insns += fmt_insn("0:") 244 if size == 8: 245 insns += fmt_insn("ldxrb %w[res], [%x[addr]]") 246 insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") 247 elif size == 16: 248 insns += fmt_insn("ldxrh %w[res], [%x[addr]]") 249 insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") 250 elif size == 32: 251 insns += fmt_insn("ldxr %w[res], [%x[addr]]") 252 insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") 253 else: 254 assert size == 64 255 insns += fmt_insn("ldxr %x[res], [%x[addr]]") 256 insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") 257 insns += fmt_insn("cbnz %w[scratch], 0b") 258 insns += fmt_insn("dmb ish") 259 return """ 260 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 261 %(cpp_type)s res; 262 uint32_t scratch; 263 asm volatile (%(insns)s 264 : [res] "=&r"(res), [scratch] "=&r"(scratch) 265 : [addr] "r" (addr), [val] "r"(val) 266 : "memory", "cc"); 267 return res; 268 }""" % { 269 "cpp_type": cpp_type, 270 "fun_name": fun_name, 271 "insns": insns, 272 } 273 if cpu_arch == "arm": 274 insns = "" 275 insns += fmt_insn("dmb sy") 276 insns += fmt_insn("0:") 277 if size == 8: 278 insns += fmt_insn("ldrexb %[res], [%[addr]]") 279 insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") 280 elif size == 16: 281 insns += fmt_insn("ldrexh %[res], [%[addr]]") 282 insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") 283 else: 284 assert size == 32 285 insns += fmt_insn("ldrex %[res], [%[addr]]") 286 insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") 287 insns += fmt_insn("cmp %[scratch], #1") 288 insns += fmt_insn("beq 0b") 289 insns += fmt_insn("dmb sy") 290 return """ 291 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 292 %(cpp_type)s res; 293 uint32_t scratch; 294 asm volatile (%(insns)s 295 : [res] "=&r"(res), [scratch] "=&r"(scratch) 296 : [addr] "r" (addr), [val] "r"(val) 297 : "memory", "cc"); 298 return res; 299 }""" % { 300 "cpp_type": cpp_type, 301 "fun_name": fun_name, 302 "insns": insns, 303 } 304 raise Exception("Unexpected arch") 305 306 307 def gen_cmpxchg(fun_name, cpp_type, size): 308 # NOTE: the assembly code must match the generated code in: 309 # - MacroAssembler::compareExchange 310 # - MacroAssembler::compareExchange64 311 if cpu_arch == "x86" and size == 64: 312 # Use a +A constraint to load `oldval` into EDX:EAX as input/output. 313 # `newval` is loaded into ECX:EBX. 314 return r""" 315 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, 316 %(cpp_type)s oldval, 317 %(cpp_type)s newval) { 318 asm volatile ("lock; cmpxchg8b (%%[addr])\n\t" 319 : "+A" (oldval) 320 : [addr] "r" (addr), 321 "b" (uint32_t(newval & 0xffff'ffff)), 322 "c" (uint32_t(newval >> 32)) 323 : "memory", "cc"); 324 return oldval; 325 }""" % { 326 "cpp_type": cpp_type, 327 "fun_name": fun_name, 328 } 329 if cpu_arch == "arm" and size == 64: 330 return r""" 331 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, 332 %(cpp_type)s oldval, 333 %(cpp_type)s newval) { 334 uint32_t oldval0 = oldval & 0xffff'ffff; 335 uint32_t oldval1 = oldval >> 32; 336 uint32_t newval0 = newval & 0xffff'ffff; 337 uint32_t newval1 = newval >> 32; 338 asm volatile ( 339 "dmb sy\n\t" 340 "0: ldrexd r0, r1, [%%[addr]]\n\t" 341 "cmp r0, %%[oldval0]\n\t" 342 "bne 1f\n\t" 343 "cmp r1, %%[oldval1]\n\t" 344 "bne 1f\n\t" 345 "mov r2, %%[newval0]\n\t" 346 "mov r3, %%[newval1]\n\t" 347 "strexd r4, r2, r3, [%%[addr]]\n\t" 348 "cmp r4, #1\n\t" 349 "beq 0b\n\t" 350 "1: dmb sy\n\t" 351 "mov %%[oldval0], r0\n\t" 352 "mov %%[oldval1], r1\n\t" 353 : [oldval0] "+&r" (oldval0), [oldval1] "+&r"(oldval1) 354 : [addr] "r" (addr), [newval0] "r" (newval0), [newval1] "r" (newval1) 355 : "memory", "cc", "r0", "r1", "r2", "r3", "r4"); 356 return uint64_t(oldval0) | (uint64_t(oldval1) << 32); 357 }""" % { 358 "cpp_type": cpp_type, 359 "fun_name": fun_name, 360 } 361 if cpu_arch in ("x86", "x86_64"): 362 # Use a +a constraint to load `oldval` into RAX as input/output register. 363 insns = "" 364 if size == 8: 365 insns += fmt_insn("lock; cmpxchgb %[newval], (%[addr])") 366 elif size == 16: 367 insns += fmt_insn("lock; cmpxchgw %[newval], (%[addr])") 368 elif size == 32: 369 insns += fmt_insn("lock; cmpxchgl %[newval], (%[addr])") 370 else: 371 assert size == 64 372 insns += fmt_insn("lock; cmpxchgq %[newval], (%[addr])") 373 return """ 374 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, 375 %(cpp_type)s oldval, 376 %(cpp_type)s newval) { 377 asm volatile (%(insns)s 378 : [oldval] "+a" (oldval) 379 : [addr] "r" (addr), [newval] "r" (newval) 380 : "memory", "cc"); 381 return oldval; 382 }""" % { 383 "cpp_type": cpp_type, 384 "fun_name": fun_name, 385 "insns": insns, 386 } 387 if cpu_arch == "aarch64": 388 insns = "" 389 insns += fmt_insn("dmb ish") 390 insns += fmt_insn("0:") 391 if size == 8: 392 insns += fmt_insn("uxtb %w[scratch], %w[oldval]") 393 insns += fmt_insn("ldxrb %w[res], [%x[addr]]") 394 insns += fmt_insn("cmp %w[res], %w[scratch]") 395 insns += fmt_insn("b.ne 1f") 396 insns += fmt_insn("stxrb %w[scratch], %w[newval], [%x[addr]]") 397 elif size == 16: 398 insns += fmt_insn("uxth %w[scratch], %w[oldval]") 399 insns += fmt_insn("ldxrh %w[res], [%x[addr]]") 400 insns += fmt_insn("cmp %w[res], %w[scratch]") 401 insns += fmt_insn("b.ne 1f") 402 insns += fmt_insn("stxrh %w[scratch], %w[newval], [%x[addr]]") 403 elif size == 32: 404 insns += fmt_insn("mov %w[scratch], %w[oldval]") 405 insns += fmt_insn("ldxr %w[res], [%x[addr]]") 406 insns += fmt_insn("cmp %w[res], %w[scratch]") 407 insns += fmt_insn("b.ne 1f") 408 insns += fmt_insn("stxr %w[scratch], %w[newval], [%x[addr]]") 409 else: 410 assert size == 64 411 insns += fmt_insn("mov %x[scratch], %x[oldval]") 412 insns += fmt_insn("ldxr %x[res], [%x[addr]]") 413 insns += fmt_insn("cmp %x[res], %x[scratch]") 414 insns += fmt_insn("b.ne 1f") 415 insns += fmt_insn("stxr %w[scratch], %x[newval], [%x[addr]]") 416 insns += fmt_insn("cbnz %w[scratch], 0b") 417 insns += fmt_insn("1: dmb ish") 418 return """ 419 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, 420 %(cpp_type)s oldval, 421 %(cpp_type)s newval) { 422 %(cpp_type)s res, scratch; 423 asm volatile (%(insns)s 424 : [res] "=&r" (res), [scratch] "=&r" (scratch) 425 : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) 426 : "memory", "cc"); 427 return res; 428 }""" % { 429 "cpp_type": cpp_type, 430 "fun_name": fun_name, 431 "insns": insns, 432 } 433 if cpu_arch == "arm": 434 insns = "" 435 insns += fmt_insn("dmb sy") 436 insns += fmt_insn("0:") 437 if size == 8: 438 insns += fmt_insn("uxtb %[scratch], %[oldval]") 439 insns += fmt_insn("ldrexb %[res], [%[addr]]") 440 insns += fmt_insn("cmp %[res], %[scratch]") 441 insns += fmt_insn("bne 1f") 442 insns += fmt_insn("strexb %[scratch], %[newval], [%[addr]]") 443 elif size == 16: 444 insns += fmt_insn("uxth %[scratch], %[oldval]") 445 insns += fmt_insn("ldrexh %[res], [%[addr]]") 446 insns += fmt_insn("cmp %[res], %[scratch]") 447 insns += fmt_insn("bne 1f") 448 insns += fmt_insn("strexh %[scratch], %[newval], [%[addr]]") 449 else: 450 assert size == 32 451 insns += fmt_insn("mov %[scratch], %[oldval]") 452 insns += fmt_insn("ldrex %[res], [%[addr]]") 453 insns += fmt_insn("cmp %[res], %[scratch]") 454 insns += fmt_insn("bne 1f") 455 insns += fmt_insn("strex %[scratch], %[newval], [%[addr]]") 456 insns += fmt_insn("cmp %[scratch], #1") 457 insns += fmt_insn("beq 0b") 458 insns += fmt_insn("1: dmb sy") 459 return """ 460 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, 461 %(cpp_type)s oldval, 462 %(cpp_type)s newval) { 463 %(cpp_type)s res, scratch; 464 asm volatile (%(insns)s 465 : [res] "=&r" (res), [scratch] "=&r" (scratch) 466 : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) 467 : "memory", "cc"); 468 return res; 469 }""" % { 470 "cpp_type": cpp_type, 471 "fun_name": fun_name, 472 "insns": insns, 473 } 474 raise Exception("Unexpected arch") 475 476 477 def gen_fetchop(fun_name, cpp_type, size, op): 478 # NOTE: the assembly code must match the generated code in: 479 # - MacroAssembler::atomicFetchOp 480 # - MacroAssembler::atomicFetchOp64 (on 64-bit platforms) 481 if cpu_arch in ("x86", "x86_64"): 482 # The `add` operation can be optimized with XADD. 483 if op == "add": 484 insns = "" 485 if size == 8: 486 insns += fmt_insn("lock; xaddb %[val], (%[addr])") 487 elif size == 16: 488 insns += fmt_insn("lock; xaddw %[val], (%[addr])") 489 elif size == 32: 490 insns += fmt_insn("lock; xaddl %[val], (%[addr])") 491 else: 492 assert size == 64 493 insns += fmt_insn("lock; xaddq %[val], (%[addr])") 494 return """ 495 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 496 asm volatile (%(insns)s 497 : [val] "+&r" (val) 498 : [addr] "r" (addr) 499 : "memory", "cc"); 500 return val; 501 }""" % { 502 "cpp_type": cpp_type, 503 "fun_name": fun_name, 504 "insns": insns, 505 } 506 # Use a +a constraint to ensure `res` is stored in RAX. This is required 507 # for the CMPXCHG instruction. 508 insns = "" 509 if size == 8: 510 insns += fmt_insn("movb (%[addr]), %[res]") 511 insns += fmt_insn("0: movb %[res], %[scratch]") 512 insns += fmt_insn("OPb %[val], %[scratch]") 513 insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") 514 elif size == 16: 515 insns += fmt_insn("movw (%[addr]), %[res]") 516 insns += fmt_insn("0: movw %[res], %[scratch]") 517 insns += fmt_insn("OPw %[val], %[scratch]") 518 insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") 519 elif size == 32: 520 insns += fmt_insn("movl (%[addr]), %[res]") 521 insns += fmt_insn("0: movl %[res], %[scratch]") 522 insns += fmt_insn("OPl %[val], %[scratch]") 523 insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") 524 else: 525 assert size == 64 526 insns += fmt_insn("movq (%[addr]), %[res]") 527 insns += fmt_insn("0: movq %[res], %[scratch]") 528 insns += fmt_insn("OPq %[val], %[scratch]") 529 insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") 530 insns = insns.replace("OP", op) 531 insns += fmt_insn("jnz 0b") 532 return """ 533 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 534 %(cpp_type)s res, scratch; 535 asm volatile (%(insns)s 536 : [res] "=&a" (res), [scratch] "=&r" (scratch) 537 : [addr] "r" (addr), [val] "r"(val) 538 : "memory", "cc"); 539 return res; 540 }""" % { 541 "cpp_type": cpp_type, 542 "fun_name": fun_name, 543 "insns": insns, 544 } 545 if cpu_arch == "aarch64": 546 insns = "" 547 insns += fmt_insn("dmb ish") 548 insns += fmt_insn("0:") 549 if size == 8: 550 insns += fmt_insn("ldxrb %w[res], [%x[addr]]") 551 insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") 552 insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") 553 elif size == 16: 554 insns += fmt_insn("ldxrh %w[res], [%x[addr]]") 555 insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") 556 insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") 557 elif size == 32: 558 insns += fmt_insn("ldxr %w[res], [%x[addr]]") 559 insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") 560 insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") 561 else: 562 assert size == 64 563 insns += fmt_insn("ldxr %x[res], [%x[addr]]") 564 insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") 565 insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") 566 cpu_op = op 567 if cpu_op == "or": 568 cpu_op = "orr" 569 if cpu_op == "xor": 570 cpu_op = "eor" 571 insns = insns.replace("OP", cpu_op) 572 insns += fmt_insn("cbnz %w[scratch2], 0b") 573 insns += fmt_insn("dmb ish") 574 return """ 575 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 576 %(cpp_type)s res; 577 uintptr_t scratch1, scratch2; 578 asm volatile (%(insns)s 579 : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) 580 : [addr] "r" (addr), [val] "r"(val) 581 : "memory", "cc"); 582 return res; 583 }""" % { 584 "cpp_type": cpp_type, 585 "fun_name": fun_name, 586 "insns": insns, 587 } 588 if cpu_arch == "arm": 589 insns = "" 590 insns += fmt_insn("dmb sy") 591 insns += fmt_insn("0:") 592 if size == 8: 593 insns += fmt_insn("ldrexb %[res], [%[addr]]") 594 insns += fmt_insn("OP %[scratch1], %[res], %[val]") 595 insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") 596 elif size == 16: 597 insns += fmt_insn("ldrexh %[res], [%[addr]]") 598 insns += fmt_insn("OP %[scratch1], %[res], %[val]") 599 insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") 600 else: 601 assert size == 32 602 insns += fmt_insn("ldrex %[res], [%[addr]]") 603 insns += fmt_insn("OP %[scratch1], %[res], %[val]") 604 insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") 605 cpu_op = op 606 if cpu_op == "or": 607 cpu_op = "orr" 608 if cpu_op == "xor": 609 cpu_op = "eor" 610 insns = insns.replace("OP", cpu_op) 611 insns += fmt_insn("cmp %[scratch2], #1") 612 insns += fmt_insn("beq 0b") 613 insns += fmt_insn("dmb sy") 614 return """ 615 INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { 616 %(cpp_type)s res; 617 uintptr_t scratch1, scratch2; 618 asm volatile (%(insns)s 619 : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) 620 : [addr] "r" (addr), [val] "r"(val) 621 : "memory", "cc"); 622 return res; 623 }""" % { 624 "cpp_type": cpp_type, 625 "fun_name": fun_name, 626 "insns": insns, 627 } 628 raise Exception("Unexpected arch") 629 630 631 def gen_pause(fun_name): 632 if cpu_arch in ("x86", "x86_64"): 633 return r""" 634 INLINE_ATTR void %(fun_name)s() { 635 asm volatile ("pause" :::); 636 }""" % { 637 "fun_name": fun_name, 638 } 639 if cpu_arch == "aarch64": 640 return r""" 641 INLINE_ATTR void %(fun_name)s() { 642 asm volatile ("isb" ::: "memory"); 643 }""" % { 644 "fun_name": fun_name, 645 } 646 if cpu_arch == "arm": 647 return r""" 648 INLINE_ATTR void %(fun_name)s() { 649 asm volatile ("yield" :::); 650 }""" % { 651 "fun_name": fun_name, 652 } 653 raise Exception("Unexpected arch") 654 655 656 def gen_copy(fun_name, cpp_type, size, unroll, direction): 657 assert direction in ("down", "up") 658 offset = 0 659 if direction == "up": 660 offset = unroll - 1 661 insns = "" 662 for i in range(unroll): 663 if cpu_arch in ("x86", "x86_64"): 664 if size == 1: 665 insns += fmt_insn("movb OFFSET(%[src]), %[scratch]") 666 insns += fmt_insn("movb %[scratch], OFFSET(%[dst])") 667 elif size == 2: 668 insns += fmt_insn("movw OFFSET(%[src]), %[scratch]") 669 insns += fmt_insn("movw %[scratch], OFFSET(%[dst])") 670 elif size == 4: 671 insns += fmt_insn("movl OFFSET(%[src]), %[scratch]") 672 insns += fmt_insn("movl %[scratch], OFFSET(%[dst])") 673 else: 674 assert size == 8 675 insns += fmt_insn("movq OFFSET(%[src]), %[scratch]") 676 insns += fmt_insn("movq %[scratch], OFFSET(%[dst])") 677 elif cpu_arch == "aarch64": 678 if size == 1: 679 insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]") 680 insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]") 681 elif size == 2: 682 insns += fmt_insn("ldrh %w[scratch], [%x[src], OFFSET]") 683 insns += fmt_insn("strh %w[scratch], [%x[dst], OFFSET]") 684 elif size == 4: 685 insns += fmt_insn("ldr %w[scratch], [%x[src], OFFSET]") 686 insns += fmt_insn("str %w[scratch], [%x[dst], OFFSET]") 687 else: 688 assert size == 8 689 insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]") 690 insns += fmt_insn("str %x[scratch], [%x[dst], OFFSET]") 691 elif cpu_arch == "arm": 692 if size == 1: 693 insns += fmt_insn("ldrb %[scratch], [%[src], #OFFSET]") 694 insns += fmt_insn("strb %[scratch], [%[dst], #OFFSET]") 695 elif size == 2: 696 insns += fmt_insn("ldrh %[scratch], [%[src], #OFFSET]") 697 insns += fmt_insn("strh %[scratch], [%[dst], #OFFSET]") 698 else: 699 assert size == 4 700 insns += fmt_insn("ldr %[scratch], [%[src], #OFFSET]") 701 insns += fmt_insn("str %[scratch], [%[dst], #OFFSET]") 702 else: 703 raise Exception("Unexpected arch") 704 insns = insns.replace("OFFSET", str(offset * size)) 705 706 if direction == "down": 707 offset += 1 708 else: 709 offset -= 1 710 711 return """ 712 INLINE_ATTR void %(fun_name)s(uint8_t* dst, const uint8_t* src) { 713 %(cpp_type)s* dst_ = reinterpret_cast<%(cpp_type)s*>(dst); 714 const %(cpp_type)s* src_ = reinterpret_cast<const %(cpp_type)s*>(src); 715 %(cpp_type)s scratch; 716 asm volatile (%(insns)s 717 : [scratch] "=&r" (scratch) 718 : [dst] "r" (dst_), [src] "r"(src_) 719 : "memory"); 720 }""" % { 721 "cpp_type": cpp_type, 722 "fun_name": fun_name, 723 "insns": insns, 724 } 725 726 727 HEADER_TEMPLATE = """\ 728 /* This Source Code Form is subject to the terms of the Mozilla Public 729 * License, v. 2.0. If a copy of the MPL was not distributed with this 730 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 731 732 #ifndef jit_AtomicOperationsGenerated_h 733 #define jit_AtomicOperationsGenerated_h 734 735 /* This file is generated by jit/GenerateAtomicOperations.py. Do not edit! */ 736 737 #include "mozilla/Attributes.h" 738 739 #include <cstdint> 740 741 namespace js { 742 namespace jit { 743 744 %(contents)s 745 746 } // namespace jit 747 } // namespace js 748 749 #endif // jit_AtomicOperationsGenerated_h 750 """ 751 752 753 def generate_atomics_header(c_out): 754 contents = "" 755 if cpu_arch in ("x86", "x86_64", "aarch64") or ( 756 cpu_arch == "arm" and int(buildconfig.substs["ARM_ARCH"]) >= 7 757 ): 758 contents += "#define JS_HAVE_GENERATED_ATOMIC_OPS 1" 759 760 # `fence` performs a full memory barrier. 761 contents += gen_seqcst("AtomicFenceSeqCst") 762 763 contents += gen_load("AtomicLoad8SeqCst", "uint8_t", 8, True) 764 contents += gen_load("AtomicLoad16SeqCst", "uint16_t", 16, True) 765 contents += gen_load("AtomicLoad32SeqCst", "uint32_t", 32, True) 766 if is_64bit: 767 contents += gen_load("AtomicLoad64SeqCst", "uint64_t", 64, True) 768 769 # These are access-atomic up to sizeof(uintptr_t). 770 contents += gen_load("AtomicLoad8Unsynchronized", "uint8_t", 8, False) 771 contents += gen_load("AtomicLoad16Unsynchronized", "uint16_t", 16, False) 772 contents += gen_load("AtomicLoad32Unsynchronized", "uint32_t", 32, False) 773 if is_64bit: 774 contents += gen_load("AtomicLoad64Unsynchronized", "uint64_t", 64, False) 775 776 contents += gen_store("AtomicStore8SeqCst", "uint8_t", 8, True) 777 contents += gen_store("AtomicStore16SeqCst", "uint16_t", 16, True) 778 contents += gen_store("AtomicStore32SeqCst", "uint32_t", 32, True) 779 if is_64bit: 780 contents += gen_store("AtomicStore64SeqCst", "uint64_t", 64, True) 781 782 # These are access-atomic up to sizeof(uintptr_t). 783 contents += gen_store("AtomicStore8Unsynchronized", "uint8_t", 8, False) 784 contents += gen_store("AtomicStore16Unsynchronized", "uint16_t", 16, False) 785 contents += gen_store("AtomicStore32Unsynchronized", "uint32_t", 32, False) 786 if is_64bit: 787 contents += gen_store("AtomicStore64Unsynchronized", "uint64_t", 64, False) 788 789 # `exchange` takes a cell address and a value. It stores it in the cell and 790 # returns the value previously in the cell. 791 contents += gen_exchange("AtomicExchange8SeqCst", "uint8_t", 8) 792 contents += gen_exchange("AtomicExchange16SeqCst", "uint16_t", 16) 793 contents += gen_exchange("AtomicExchange32SeqCst", "uint32_t", 32) 794 if is_64bit: 795 contents += gen_exchange("AtomicExchange64SeqCst", "uint64_t", 64) 796 797 # `cmpxchg` takes a cell address, an expected value and a replacement value. 798 # If the value in the cell equals the expected value then the replacement value 799 # is stored in the cell. It always returns the value previously in the cell. 800 contents += gen_cmpxchg("AtomicCmpXchg8SeqCst", "uint8_t", 8) 801 contents += gen_cmpxchg("AtomicCmpXchg16SeqCst", "uint16_t", 16) 802 contents += gen_cmpxchg("AtomicCmpXchg32SeqCst", "uint32_t", 32) 803 contents += gen_cmpxchg("AtomicCmpXchg64SeqCst", "uint64_t", 64) 804 805 # `add` adds a value atomically to the cell and returns the old value in the 806 # cell. (There is no `sub`; just add the negated value.) 807 contents += gen_fetchop("AtomicAdd8SeqCst", "uint8_t", 8, "add") 808 contents += gen_fetchop("AtomicAdd16SeqCst", "uint16_t", 16, "add") 809 contents += gen_fetchop("AtomicAdd32SeqCst", "uint32_t", 32, "add") 810 if is_64bit: 811 contents += gen_fetchop("AtomicAdd64SeqCst", "uint64_t", 64, "add") 812 813 # `and` bitwise-ands a value atomically into the cell and returns the old value 814 # in the cell. 815 contents += gen_fetchop("AtomicAnd8SeqCst", "uint8_t", 8, "and") 816 contents += gen_fetchop("AtomicAnd16SeqCst", "uint16_t", 16, "and") 817 contents += gen_fetchop("AtomicAnd32SeqCst", "uint32_t", 32, "and") 818 if is_64bit: 819 contents += gen_fetchop("AtomicAnd64SeqCst", "uint64_t", 64, "and") 820 821 # `or` bitwise-ors a value atomically into the cell and returns the old value 822 # in the cell. 823 contents += gen_fetchop("AtomicOr8SeqCst", "uint8_t", 8, "or") 824 contents += gen_fetchop("AtomicOr16SeqCst", "uint16_t", 16, "or") 825 contents += gen_fetchop("AtomicOr32SeqCst", "uint32_t", 32, "or") 826 if is_64bit: 827 contents += gen_fetchop("AtomicOr64SeqCst", "uint64_t", 64, "or") 828 829 # `xor` bitwise-xors a value atomically into the cell and returns the old value 830 # in the cell. 831 contents += gen_fetchop("AtomicXor8SeqCst", "uint8_t", 8, "xor") 832 contents += gen_fetchop("AtomicXor16SeqCst", "uint16_t", 16, "xor") 833 contents += gen_fetchop("AtomicXor32SeqCst", "uint32_t", 32, "xor") 834 if is_64bit: 835 contents += gen_fetchop("AtomicXor64SeqCst", "uint64_t", 64, "xor") 836 837 # Pause or yield instruction. 838 contents += gen_pause("AtomicPause") 839 840 # See comment in jit/AtomicOperations-shared-jit.cpp for an explanation. 841 wordsize = 8 if is_64bit else 4 842 words_in_block = 8 843 blocksize = words_in_block * wordsize 844 845 contents += gen_copy( 846 "AtomicCopyUnalignedBlockDownUnsynchronized", 847 "uint8_t", 848 1, 849 blocksize, 850 "down", 851 ) 852 contents += gen_copy( 853 "AtomicCopyUnalignedBlockUpUnsynchronized", "uint8_t", 1, blocksize, "up" 854 ) 855 856 contents += gen_copy( 857 "AtomicCopyUnalignedWordDownUnsynchronized", "uint8_t", 1, wordsize, "down" 858 ) 859 contents += gen_copy( 860 "AtomicCopyUnalignedWordUpUnsynchronized", "uint8_t", 1, wordsize, "up" 861 ) 862 863 contents += gen_copy( 864 "AtomicCopyBlockDownUnsynchronized", 865 "uintptr_t", 866 wordsize, 867 words_in_block, 868 "down", 869 ) 870 contents += gen_copy( 871 "AtomicCopyBlockUpUnsynchronized", 872 "uintptr_t", 873 wordsize, 874 words_in_block, 875 "up", 876 ) 877 878 contents += gen_copy( 879 "AtomicCopyWordUnsynchronized", "uintptr_t", wordsize, 1, "down" 880 ) 881 contents += gen_copy("AtomicCopy32Unsynchronized", "uint32_t", 4, 1, "down") 882 contents += gen_copy("AtomicCopy16Unsynchronized", "uint16_t", 2, 1, "down") 883 contents += gen_copy("AtomicCopy8Unsynchronized", "uint8_t", 1, 1, "down") 884 885 contents += "\n" 886 contents += ( 887 "constexpr size_t JS_GENERATED_ATOMICS_BLOCKSIZE = " 888 + str(blocksize) 889 + ";\n" 890 ) 891 contents += ( 892 "constexpr size_t JS_GENERATED_ATOMICS_WORDSIZE = " + str(wordsize) + ";\n" 893 ) 894 895 # Work around a GCC issue on 32-bit x86 by adding MOZ_NEVER_INLINE. 896 # See bug 1756347. 897 if is_gcc and cpu_arch == "x86": 898 contents = contents.replace("INLINE_ATTR", "MOZ_NEVER_INLINE inline") 899 else: 900 contents = contents.replace("INLINE_ATTR", "inline") 901 902 c_out.write( 903 HEADER_TEMPLATE 904 % { 905 "contents": contents, 906 } 907 )