mpcpucache.c (23757B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mpi.h" 6 #include "prtypes.h" 7 8 /* 9 * This file implements a single function: s_mpi_getProcessorLineSize(); 10 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line 11 * if a cache exists, or zero if there is no cache. If more than one 12 * cache line exists, it should return the smallest line size (which is 13 * usually the L1 cache). 14 * 15 * mp_modexp uses this information to make sure that private key information 16 * isn't being leaked through the cache. 17 * 18 * Currently the file returns good data for most modern x86 processors, and 19 * reasonable data on 64-bit ppc processors. All other processors are assumed 20 * to have a cache line size of 32 bytes. 21 * 22 */ 23 24 #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) 25 /* X86 processors have special instructions that tell us about the cache */ 26 #include "string.h" 27 28 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) 29 #define AMD_64 1 30 #endif 31 32 /* Generic CPUID function */ 33 #if defined(AMD_64) 34 35 #if defined(__GNUC__) 36 37 void 38 freebl_cpuid(unsigned long op, unsigned long *eax, 39 unsigned long *ebx, unsigned long *ecx, 40 unsigned long *edx) 41 { 42 __asm__("xor %%ecx, %%ecx\n\t" 43 "cpuid\n\t" 44 : "=a"(*eax), 45 "=b"(*ebx), 46 "=c"(*ecx), 47 "=d"(*edx) 48 : "0"(op)); 49 } 50 51 #elif defined(_MSC_VER) 52 53 #include <intrin.h> 54 55 void 56 freebl_cpuid(unsigned long op, unsigned long *eax, 57 unsigned long *ebx, unsigned long *ecx, 58 unsigned long *edx) 59 { 60 int intrinsic_out[4]; 61 62 __cpuid(intrinsic_out, op); 63 *eax = intrinsic_out[0]; 64 *ebx = intrinsic_out[1]; 65 *ecx = intrinsic_out[2]; 66 *edx = intrinsic_out[3]; 67 } 68 69 #endif 70 71 #else /* !defined(AMD_64) */ 72 73 /* x86 */ 74 75 #if defined(__GNUC__) 76 void 77 freebl_cpuid(unsigned long op, unsigned long *eax, 78 unsigned long *ebx, unsigned long *ecx, 79 unsigned long *edx) 80 { 81 /* Some older processors don't fill the ecx register with cpuid, so clobber it 82 * before calling cpuid, so that there's no risk of picking random bits that 83 * erroneously indicate that absent CPU features are present. 84 * Also, GCC isn't smart enough to save the ebx PIC register on its own 85 * in this case, so do it by hand. Use edi to store ebx and pass the 86 * value returned in ebx from cpuid through edi. */ 87 __asm__("xor %%ecx, %%ecx\n\t" 88 "mov %%ebx,%%edi\n\t" 89 "cpuid\n\t" 90 "xchgl %%ebx,%%edi\n\t" 91 : "=a"(*eax), 92 "=D"(*ebx), 93 "=c"(*ecx), 94 "=d"(*edx) 95 : "0"(op)); 96 } 97 98 /* 99 * try flipping a processor flag to determine CPU type 100 */ 101 static unsigned long 102 changeFlag(unsigned long flag) 103 { 104 unsigned long changedFlags, originalFlags; 105 __asm__("pushfl\n\t" /* get the flags */ 106 "popl %0\n\t" 107 "movl %0,%1\n\t" /* save the original flags */ 108 "xorl %2,%0\n\t" /* flip the bit */ 109 "pushl %0\n\t" /* set the flags */ 110 "popfl\n\t" 111 "pushfl\n\t" /* get the flags again (for return) */ 112 "popl %0\n\t" 113 "pushl %1\n\t" /* restore the original flags */ 114 "popfl\n\t" 115 : "=r"(changedFlags), 116 "=r"(originalFlags), 117 "=r"(flag) 118 : "2"(flag)); 119 return changedFlags ^ originalFlags; 120 } 121 122 #elif defined(_MSC_VER) 123 124 /* 125 * windows versions of the above assembler 126 */ 127 #define wcpuid __asm __emit 0fh __asm __emit 0a2h 128 void 129 freebl_cpuid(unsigned long op, unsigned long *Reax, 130 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) 131 { 132 unsigned long Leax, Lebx, Lecx, Ledx; 133 __asm { 134 pushad 135 xor ecx,ecx 136 mov eax,op 137 wcpuid 138 mov Leax,eax 139 mov Lebx,ebx 140 mov Lecx,ecx 141 mov Ledx,edx 142 popad 143 } 144 *Reax = Leax; 145 *Rebx = Lebx; 146 *Recx = Lecx; 147 *Redx = Ledx; 148 } 149 150 static unsigned long 151 changeFlag(unsigned long flag) 152 { 153 unsigned long changedFlags, originalFlags; 154 __asm { 155 push eax 156 push ebx 157 pushfd /* get the flags */ 158 pop eax 159 push eax /* save the flags on the stack */ 160 mov originalFlags,eax /* save the original flags */ 161 mov ebx,flag 162 xor eax,ebx /* flip the bit */ 163 push eax /* set the flags */ 164 popfd 165 pushfd /* get the flags again (for return) */ 166 pop eax 167 popfd /* restore the original flags */ 168 mov changedFlags,eax 169 pop ebx 170 pop eax 171 } 172 return changedFlags ^ originalFlags; 173 } 174 #endif 175 176 #endif 177 178 #if !defined(AMD_64) 179 #define AC_FLAG 0x40000 180 #define ID_FLAG 0x200000 181 182 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ 183 static int 184 is386() 185 { 186 return changeFlag(AC_FLAG) == 0; 187 } 188 189 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ 190 static int 191 is486() 192 { 193 return changeFlag(ID_FLAG) == 0; 194 } 195 #endif 196 197 /* 198 * table for Intel Cache. 199 * See Intel Application Note AP-485 for more information 200 */ 201 202 typedef unsigned char CacheTypeEntry; 203 204 typedef enum { 205 Cache_NONE = 0, 206 Cache_UNKNOWN = 1, 207 Cache_TLB = 2, 208 Cache_TLBi = 3, 209 Cache_TLBd = 4, 210 Cache_Trace = 5, 211 Cache_L1 = 6, 212 Cache_L1i = 7, 213 Cache_L1d = 8, 214 Cache_L2 = 9, 215 Cache_L2i = 10, 216 Cache_L2d = 11, 217 Cache_L3 = 12, 218 Cache_L3i = 13, 219 Cache_L3d = 14 220 } CacheType; 221 222 struct _cache { 223 CacheTypeEntry type; 224 unsigned char lineSize; 225 }; 226 static const struct _cache CacheMap[256] = { 227 /* 00 */ { Cache_NONE, 0 }, 228 /* 01 */ { Cache_TLBi, 0 }, 229 /* 02 */ { Cache_TLBi, 0 }, 230 /* 03 */ { Cache_TLBd, 0 }, 231 /* 04 */ { 232 Cache_TLBd, 233 }, 234 /* 05 */ { Cache_UNKNOWN, 0 }, 235 /* 06 */ { Cache_L1i, 32 }, 236 /* 07 */ { Cache_UNKNOWN, 0 }, 237 /* 08 */ { Cache_L1i, 32 }, 238 /* 09 */ { Cache_UNKNOWN, 0 }, 239 /* 0a */ { Cache_L1d, 32 }, 240 /* 0b */ { Cache_UNKNOWN, 0 }, 241 /* 0c */ { Cache_L1d, 32 }, 242 /* 0d */ { Cache_UNKNOWN, 0 }, 243 /* 0e */ { Cache_UNKNOWN, 0 }, 244 /* 0f */ { Cache_UNKNOWN, 0 }, 245 /* 10 */ { Cache_UNKNOWN, 0 }, 246 /* 11 */ { Cache_UNKNOWN, 0 }, 247 /* 12 */ { Cache_UNKNOWN, 0 }, 248 /* 13 */ { Cache_UNKNOWN, 0 }, 249 /* 14 */ { Cache_UNKNOWN, 0 }, 250 /* 15 */ { Cache_UNKNOWN, 0 }, 251 /* 16 */ { Cache_UNKNOWN, 0 }, 252 /* 17 */ { Cache_UNKNOWN, 0 }, 253 /* 18 */ { Cache_UNKNOWN, 0 }, 254 /* 19 */ { Cache_UNKNOWN, 0 }, 255 /* 1a */ { Cache_UNKNOWN, 0 }, 256 /* 1b */ { Cache_UNKNOWN, 0 }, 257 /* 1c */ { Cache_UNKNOWN, 0 }, 258 /* 1d */ { Cache_UNKNOWN, 0 }, 259 /* 1e */ { Cache_UNKNOWN, 0 }, 260 /* 1f */ { Cache_UNKNOWN, 0 }, 261 /* 20 */ { Cache_UNKNOWN, 0 }, 262 /* 21 */ { Cache_UNKNOWN, 0 }, 263 /* 22 */ { Cache_L3, 64 }, 264 /* 23 */ { Cache_L3, 64 }, 265 /* 24 */ { Cache_UNKNOWN, 0 }, 266 /* 25 */ { Cache_L3, 64 }, 267 /* 26 */ { Cache_UNKNOWN, 0 }, 268 /* 27 */ { Cache_UNKNOWN, 0 }, 269 /* 28 */ { Cache_UNKNOWN, 0 }, 270 /* 29 */ { Cache_L3, 64 }, 271 /* 2a */ { Cache_UNKNOWN, 0 }, 272 /* 2b */ { Cache_UNKNOWN, 0 }, 273 /* 2c */ { Cache_L1d, 64 }, 274 /* 2d */ { Cache_UNKNOWN, 0 }, 275 /* 2e */ { Cache_UNKNOWN, 0 }, 276 /* 2f */ { Cache_UNKNOWN, 0 }, 277 /* 30 */ { Cache_L1i, 64 }, 278 /* 31 */ { Cache_UNKNOWN, 0 }, 279 /* 32 */ { Cache_UNKNOWN, 0 }, 280 /* 33 */ { Cache_UNKNOWN, 0 }, 281 /* 34 */ { Cache_UNKNOWN, 0 }, 282 /* 35 */ { Cache_UNKNOWN, 0 }, 283 /* 36 */ { Cache_UNKNOWN, 0 }, 284 /* 37 */ { Cache_UNKNOWN, 0 }, 285 /* 38 */ { Cache_UNKNOWN, 0 }, 286 /* 39 */ { Cache_L2, 64 }, 287 /* 3a */ { Cache_UNKNOWN, 0 }, 288 /* 3b */ { Cache_L2, 64 }, 289 /* 3c */ { Cache_L2, 64 }, 290 /* 3d */ { Cache_UNKNOWN, 0 }, 291 /* 3e */ { Cache_UNKNOWN, 0 }, 292 /* 3f */ { Cache_UNKNOWN, 0 }, 293 /* 40 */ { Cache_L2, 0 }, 294 /* 41 */ { Cache_L2, 32 }, 295 /* 42 */ { Cache_L2, 32 }, 296 /* 43 */ { Cache_L2, 32 }, 297 /* 44 */ { Cache_L2, 32 }, 298 /* 45 */ { Cache_L2, 32 }, 299 /* 46 */ { Cache_UNKNOWN, 0 }, 300 /* 47 */ { Cache_UNKNOWN, 0 }, 301 /* 48 */ { Cache_UNKNOWN, 0 }, 302 /* 49 */ { Cache_UNKNOWN, 0 }, 303 /* 4a */ { Cache_UNKNOWN, 0 }, 304 /* 4b */ { Cache_UNKNOWN, 0 }, 305 /* 4c */ { Cache_UNKNOWN, 0 }, 306 /* 4d */ { Cache_UNKNOWN, 0 }, 307 /* 4e */ { Cache_UNKNOWN, 0 }, 308 /* 4f */ { Cache_UNKNOWN, 0 }, 309 /* 50 */ { Cache_TLBi, 0 }, 310 /* 51 */ { Cache_TLBi, 0 }, 311 /* 52 */ { Cache_TLBi, 0 }, 312 /* 53 */ { Cache_UNKNOWN, 0 }, 313 /* 54 */ { Cache_UNKNOWN, 0 }, 314 /* 55 */ { Cache_UNKNOWN, 0 }, 315 /* 56 */ { Cache_UNKNOWN, 0 }, 316 /* 57 */ { Cache_UNKNOWN, 0 }, 317 /* 58 */ { Cache_UNKNOWN, 0 }, 318 /* 59 */ { Cache_UNKNOWN, 0 }, 319 /* 5a */ { Cache_UNKNOWN, 0 }, 320 /* 5b */ { Cache_TLBd, 0 }, 321 /* 5c */ { Cache_TLBd, 0 }, 322 /* 5d */ { Cache_TLBd, 0 }, 323 /* 5e */ { Cache_UNKNOWN, 0 }, 324 /* 5f */ { Cache_UNKNOWN, 0 }, 325 /* 60 */ { Cache_UNKNOWN, 0 }, 326 /* 61 */ { Cache_UNKNOWN, 0 }, 327 /* 62 */ { Cache_UNKNOWN, 0 }, 328 /* 63 */ { Cache_UNKNOWN, 0 }, 329 /* 64 */ { Cache_UNKNOWN, 0 }, 330 /* 65 */ { Cache_UNKNOWN, 0 }, 331 /* 66 */ { Cache_L1d, 64 }, 332 /* 67 */ { Cache_L1d, 64 }, 333 /* 68 */ { Cache_L1d, 64 }, 334 /* 69 */ { Cache_UNKNOWN, 0 }, 335 /* 6a */ { Cache_UNKNOWN, 0 }, 336 /* 6b */ { Cache_UNKNOWN, 0 }, 337 /* 6c */ { Cache_UNKNOWN, 0 }, 338 /* 6d */ { Cache_UNKNOWN, 0 }, 339 /* 6e */ { Cache_UNKNOWN, 0 }, 340 /* 6f */ { Cache_UNKNOWN, 0 }, 341 /* 70 */ { Cache_Trace, 1 }, 342 /* 71 */ { Cache_Trace, 1 }, 343 /* 72 */ { Cache_Trace, 1 }, 344 /* 73 */ { Cache_UNKNOWN, 0 }, 345 /* 74 */ { Cache_UNKNOWN, 0 }, 346 /* 75 */ { Cache_UNKNOWN, 0 }, 347 /* 76 */ { Cache_UNKNOWN, 0 }, 348 /* 77 */ { Cache_UNKNOWN, 0 }, 349 /* 78 */ { Cache_UNKNOWN, 0 }, 350 /* 79 */ { Cache_L2, 64 }, 351 /* 7a */ { Cache_L2, 64 }, 352 /* 7b */ { Cache_L2, 64 }, 353 /* 7c */ { Cache_L2, 64 }, 354 /* 7d */ { Cache_UNKNOWN, 0 }, 355 /* 7e */ { Cache_UNKNOWN, 0 }, 356 /* 7f */ { Cache_UNKNOWN, 0 }, 357 /* 80 */ { Cache_UNKNOWN, 0 }, 358 /* 81 */ { Cache_UNKNOWN, 0 }, 359 /* 82 */ { Cache_L2, 32 }, 360 /* 83 */ { Cache_L2, 32 }, 361 /* 84 */ { Cache_L2, 32 }, 362 /* 85 */ { Cache_L2, 32 }, 363 /* 86 */ { Cache_L2, 64 }, 364 /* 87 */ { Cache_L2, 64 }, 365 /* 88 */ { Cache_UNKNOWN, 0 }, 366 /* 89 */ { Cache_UNKNOWN, 0 }, 367 /* 8a */ { Cache_UNKNOWN, 0 }, 368 /* 8b */ { Cache_UNKNOWN, 0 }, 369 /* 8c */ { Cache_UNKNOWN, 0 }, 370 /* 8d */ { Cache_UNKNOWN, 0 }, 371 /* 8e */ { Cache_UNKNOWN, 0 }, 372 /* 8f */ { Cache_UNKNOWN, 0 }, 373 /* 90 */ { Cache_UNKNOWN, 0 }, 374 /* 91 */ { Cache_UNKNOWN, 0 }, 375 /* 92 */ { Cache_UNKNOWN, 0 }, 376 /* 93 */ { Cache_UNKNOWN, 0 }, 377 /* 94 */ { Cache_UNKNOWN, 0 }, 378 /* 95 */ { Cache_UNKNOWN, 0 }, 379 /* 96 */ { Cache_UNKNOWN, 0 }, 380 /* 97 */ { Cache_UNKNOWN, 0 }, 381 /* 98 */ { Cache_UNKNOWN, 0 }, 382 /* 99 */ { Cache_UNKNOWN, 0 }, 383 /* 9a */ { Cache_UNKNOWN, 0 }, 384 /* 9b */ { Cache_UNKNOWN, 0 }, 385 /* 9c */ { Cache_UNKNOWN, 0 }, 386 /* 9d */ { Cache_UNKNOWN, 0 }, 387 /* 9e */ { Cache_UNKNOWN, 0 }, 388 /* 9f */ { Cache_UNKNOWN, 0 }, 389 /* a0 */ { Cache_UNKNOWN, 0 }, 390 /* a1 */ { Cache_UNKNOWN, 0 }, 391 /* a2 */ { Cache_UNKNOWN, 0 }, 392 /* a3 */ { Cache_UNKNOWN, 0 }, 393 /* a4 */ { Cache_UNKNOWN, 0 }, 394 /* a5 */ { Cache_UNKNOWN, 0 }, 395 /* a6 */ { Cache_UNKNOWN, 0 }, 396 /* a7 */ { Cache_UNKNOWN, 0 }, 397 /* a8 */ { Cache_UNKNOWN, 0 }, 398 /* a9 */ { Cache_UNKNOWN, 0 }, 399 /* aa */ { Cache_UNKNOWN, 0 }, 400 /* ab */ { Cache_UNKNOWN, 0 }, 401 /* ac */ { Cache_UNKNOWN, 0 }, 402 /* ad */ { Cache_UNKNOWN, 0 }, 403 /* ae */ { Cache_UNKNOWN, 0 }, 404 /* af */ { Cache_UNKNOWN, 0 }, 405 /* b0 */ { Cache_TLBi, 0 }, 406 /* b1 */ { Cache_UNKNOWN, 0 }, 407 /* b2 */ { Cache_UNKNOWN, 0 }, 408 /* b3 */ { Cache_TLBd, 0 }, 409 /* b4 */ { Cache_UNKNOWN, 0 }, 410 /* b5 */ { Cache_UNKNOWN, 0 }, 411 /* b6 */ { Cache_UNKNOWN, 0 }, 412 /* b7 */ { Cache_UNKNOWN, 0 }, 413 /* b8 */ { Cache_UNKNOWN, 0 }, 414 /* b9 */ { Cache_UNKNOWN, 0 }, 415 /* ba */ { Cache_UNKNOWN, 0 }, 416 /* bb */ { Cache_UNKNOWN, 0 }, 417 /* bc */ { Cache_UNKNOWN, 0 }, 418 /* bd */ { Cache_UNKNOWN, 0 }, 419 /* be */ { Cache_UNKNOWN, 0 }, 420 /* bf */ { Cache_UNKNOWN, 0 }, 421 /* c0 */ { Cache_UNKNOWN, 0 }, 422 /* c1 */ { Cache_UNKNOWN, 0 }, 423 /* c2 */ { Cache_UNKNOWN, 0 }, 424 /* c3 */ { Cache_UNKNOWN, 0 }, 425 /* c4 */ { Cache_UNKNOWN, 0 }, 426 /* c5 */ { Cache_UNKNOWN, 0 }, 427 /* c6 */ { Cache_UNKNOWN, 0 }, 428 /* c7 */ { Cache_UNKNOWN, 0 }, 429 /* c8 */ { Cache_UNKNOWN, 0 }, 430 /* c9 */ { Cache_UNKNOWN, 0 }, 431 /* ca */ { Cache_UNKNOWN, 0 }, 432 /* cb */ { Cache_UNKNOWN, 0 }, 433 /* cc */ { Cache_UNKNOWN, 0 }, 434 /* cd */ { Cache_UNKNOWN, 0 }, 435 /* ce */ { Cache_UNKNOWN, 0 }, 436 /* cf */ { Cache_UNKNOWN, 0 }, 437 /* d0 */ { Cache_UNKNOWN, 0 }, 438 /* d1 */ { Cache_UNKNOWN, 0 }, 439 /* d2 */ { Cache_UNKNOWN, 0 }, 440 /* d3 */ { Cache_UNKNOWN, 0 }, 441 /* d4 */ { Cache_UNKNOWN, 0 }, 442 /* d5 */ { Cache_UNKNOWN, 0 }, 443 /* d6 */ { Cache_UNKNOWN, 0 }, 444 /* d7 */ { Cache_UNKNOWN, 0 }, 445 /* d8 */ { Cache_UNKNOWN, 0 }, 446 /* d9 */ { Cache_UNKNOWN, 0 }, 447 /* da */ { Cache_UNKNOWN, 0 }, 448 /* db */ { Cache_UNKNOWN, 0 }, 449 /* dc */ { Cache_UNKNOWN, 0 }, 450 /* dd */ { Cache_UNKNOWN, 0 }, 451 /* de */ { Cache_UNKNOWN, 0 }, 452 /* df */ { Cache_UNKNOWN, 0 }, 453 /* e0 */ { Cache_UNKNOWN, 0 }, 454 /* e1 */ { Cache_UNKNOWN, 0 }, 455 /* e2 */ { Cache_UNKNOWN, 0 }, 456 /* e3 */ { Cache_UNKNOWN, 0 }, 457 /* e4 */ { Cache_UNKNOWN, 0 }, 458 /* e5 */ { Cache_UNKNOWN, 0 }, 459 /* e6 */ { Cache_UNKNOWN, 0 }, 460 /* e7 */ { Cache_UNKNOWN, 0 }, 461 /* e8 */ { Cache_UNKNOWN, 0 }, 462 /* e9 */ { Cache_UNKNOWN, 0 }, 463 /* ea */ { Cache_UNKNOWN, 0 }, 464 /* eb */ { Cache_UNKNOWN, 0 }, 465 /* ec */ { Cache_UNKNOWN, 0 }, 466 /* ed */ { Cache_UNKNOWN, 0 }, 467 /* ee */ { Cache_UNKNOWN, 0 }, 468 /* ef */ { Cache_UNKNOWN, 0 }, 469 /* f0 */ { Cache_UNKNOWN, 0 }, 470 /* f1 */ { Cache_UNKNOWN, 0 }, 471 /* f2 */ { Cache_UNKNOWN, 0 }, 472 /* f3 */ { Cache_UNKNOWN, 0 }, 473 /* f4 */ { Cache_UNKNOWN, 0 }, 474 /* f5 */ { Cache_UNKNOWN, 0 }, 475 /* f6 */ { Cache_UNKNOWN, 0 }, 476 /* f7 */ { Cache_UNKNOWN, 0 }, 477 /* f8 */ { Cache_UNKNOWN, 0 }, 478 /* f9 */ { Cache_UNKNOWN, 0 }, 479 /* fa */ { Cache_UNKNOWN, 0 }, 480 /* fb */ { Cache_UNKNOWN, 0 }, 481 /* fc */ { Cache_UNKNOWN, 0 }, 482 /* fd */ { Cache_UNKNOWN, 0 }, 483 /* fe */ { Cache_UNKNOWN, 0 }, 484 /* ff */ { Cache_UNKNOWN, 0 } 485 }; 486 487 /* 488 * use the above table to determine the CacheEntryLineSize. 489 */ 490 static void 491 getIntelCacheEntryLineSize(unsigned long val, int *level, 492 unsigned long *lineSize) 493 { 494 CacheType type; 495 496 type = CacheMap[val].type; 497 /* only interested in data caches */ 498 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. 499 * this data check has the side effect of rejecting that entry. If 500 * that wasn't the case, we could have to reject it explicitly */ 501 if (CacheMap[val].lineSize == 0) { 502 return; 503 } 504 /* look at the caches, skip types we aren't interested in. 505 * if we already have a value for a lower level cache, skip the 506 * current entry */ 507 if ((type == Cache_L1) || (type == Cache_L1d)) { 508 *level = 1; 509 *lineSize = CacheMap[val].lineSize; 510 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { 511 *level = 2; 512 *lineSize = CacheMap[val].lineSize; 513 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { 514 *level = 3; 515 *lineSize = CacheMap[val].lineSize; 516 } 517 return; 518 } 519 520 static void 521 getIntelRegisterCacheLineSize(unsigned long val, 522 int *level, unsigned long *lineSize) 523 { 524 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); 525 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); 526 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); 527 getIntelCacheEntryLineSize(val & 0xff, level, lineSize); 528 } 529 530 /* 531 * returns '0' if no recognized cache is found, or if the cache 532 * information is supported by this processor 533 */ 534 static unsigned long 535 getIntelCacheLineSize(int cpuidLevel) 536 { 537 int level = 4; 538 unsigned long lineSize = 0; 539 unsigned long eax, ebx, ecx, edx; 540 int repeat, count; 541 542 if (cpuidLevel < 2) { 543 return 0; 544 } 545 546 /* command '2' of the cpuid is intel's cache info call. Each byte of the 547 * 4 registers contain a potential descriptor for the cache. The CacheMap 548 * table maps the cache entry with the processor cache. Register 'al' 549 * contains a count value that cpuid '2' needs to be called in order to 550 * find all the cache descriptors. Only registers with the high bit set 551 * to 'zero' have valid descriptors. This code loops through all the 552 * required calls to cpuid '2' and passes any valid descriptors it finds 553 * to the getIntelRegisterCacheLineSize code, which breaks the registers 554 * down into their component descriptors. In the end the lineSize of the 555 * lowest level cache data cache is returned. */ 556 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); 557 repeat = eax & 0xf; 558 for (count = 0; count < repeat; count++) { 559 if ((eax & 0x80000000) == 0) { 560 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); 561 } 562 if ((ebx & 0x80000000) == 0) { 563 getIntelRegisterCacheLineSize(ebx, &level, &lineSize); 564 } 565 if ((ecx & 0x80000000) == 0) { 566 getIntelRegisterCacheLineSize(ecx, &level, &lineSize); 567 } 568 if ((edx & 0x80000000) == 0) { 569 getIntelRegisterCacheLineSize(edx, &level, &lineSize); 570 } 571 if (count + 1 != repeat) { 572 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); 573 } 574 } 575 return lineSize; 576 } 577 578 /* 579 * returns '0' if the cache info is not supported by this processor. 580 * This is based on the AMD extended cache commands for cpuid. 581 * (see "AMD Processor Recognition Application Note" Publication 20734). 582 * Some other processors use the identical scheme. 583 * (see "Processor Recognition, Transmeta Corporation"). 584 */ 585 static unsigned long 586 getOtherCacheLineSize(unsigned long cpuidLevel) 587 { 588 unsigned long lineSize = 0; 589 unsigned long eax, ebx, ecx, edx; 590 591 /* get the Extended CPUID level */ 592 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 593 cpuidLevel = eax; 594 595 if (cpuidLevel >= 0x80000005) { 596 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 597 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ 598 } 599 return lineSize; 600 } 601 602 static const char *const manMap[] = { 603 #define INTEL 0 604 "GenuineIntel", 605 #define AMD 1 606 "AuthenticAMD", 607 #define CYRIX 2 608 "CyrixInstead", 609 #define CENTAUR 2 610 "CentaurHauls", 611 #define NEXGEN 3 612 "NexGenDriven", 613 #define TRANSMETA 4 614 "GenuineTMx86", 615 #define RISE 5 616 "RiseRiseRise", 617 #define UMC 6 618 "UMC UMC UMC ", 619 #define SIS 7 620 "Sis Sis Sis ", 621 #define NATIONAL 8 622 "Geode by NSC", 623 }; 624 625 static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); 626 627 #define MAN_UNKNOWN 9 628 629 #if !defined(AMD_64) 630 #define SSE2_FLAG (1 << 26) 631 unsigned long 632 s_mpi_is_sse2() 633 { 634 unsigned long eax, ebx, ecx, edx; 635 636 if (is386() || is486()) { 637 return 0; 638 } 639 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); 640 641 /* has no SSE2 extensions */ 642 if (eax == 0) { 643 return 0; 644 } 645 646 freebl_cpuid(1, &eax, &ebx, &ecx, &edx); 647 return (edx & SSE2_FLAG) == SSE2_FLAG; 648 } 649 #endif 650 651 unsigned long 652 s_mpi_getProcessorLineSize() 653 { 654 unsigned long eax, ebx, ecx, edx; 655 PRUint32 cpuid[3]; 656 unsigned long cpuidLevel; 657 unsigned long cacheLineSize = 0; 658 int manufacturer = MAN_UNKNOWN; 659 int i; 660 char string[13]; 661 662 #if !defined(AMD_64) 663 if (is386()) { 664 return 0; /* 386 had no cache */ 665 } 666 if (is486()) { 667 return 32; /* really? need more info */ 668 } 669 #endif 670 671 /* Pentium, cpuid command is available */ 672 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); 673 cpuidLevel = eax; 674 /* string holds the CPU's manufacturer ID string - a twelve 675 * character ASCII string stored in ebx, edx, ecx, and 676 * the 32-bit extended feature flags are in edx, ecx. 677 */ 678 cpuid[0] = ebx; 679 cpuid[1] = ecx; 680 cpuid[2] = edx; 681 memcpy(string, cpuid, sizeof(cpuid)); 682 string[12] = 0; 683 684 manufacturer = MAN_UNKNOWN; 685 for (i = 0; i < n_manufacturers; i++) { 686 if (strcmp(manMap[i], string) == 0) { 687 manufacturer = i; 688 } 689 } 690 691 if (manufacturer == INTEL) { 692 cacheLineSize = getIntelCacheLineSize(cpuidLevel); 693 } else { 694 cacheLineSize = getOtherCacheLineSize(cpuidLevel); 695 } 696 /* doesn't support cache info based on cpuid. This means 697 * an old pentium class processor, which have cache lines of 698 * 32. If we learn differently, we can use a switch based on 699 * the Manufacturer id */ 700 if (cacheLineSize == 0) { 701 cacheLineSize = 32; 702 } 703 return cacheLineSize; 704 } 705 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 706 #endif 707 708 #if defined(__ppc64__) 709 /* 710 * Sigh, The PPC has some really nice features to help us determine cache 711 * size, since it had lots of direct control functions to do so. The POWER 712 * processor even has an instruction to do this, but it was dropped in 713 * PowerPC. Unfortunately most of them are not available in user mode. 714 * 715 * The dcbz function would be a great way to determine cache line size except 716 * 1) it only works on write-back memory (it throws an exception otherwise), 717 * and 2) because so many mac programs 'knew' the processor cache size was 718 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new 719 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep 720 * these programs happy. dcbzl work if 64 bit instructions are supported. 721 * If you know 64 bit instructions are supported, and that stack is 722 * write-back, you can use this code. 723 */ 724 #include "memory.h" 725 726 /* clear the cache line that contains 'array' */ 727 static inline void 728 dcbzl(char *array) 729 { 730 __asm__("dcbzl %0, %1" 731 : /*no result*/ 732 : "b%"(array), "r"(0) 733 : "memory"); 734 } 735 736 #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) 737 738 #define PPC_MAX_LINE_SIZE 256 739 unsigned long 740 s_mpi_getProcessorLineSize() 741 { 742 char testArray[2 * PPC_MAX_LINE_SIZE + 1]; 743 char *test; 744 int i; 745 746 /* align the array on a maximum line size boundary, so we 747 * know we are starting to clear from the first address */ 748 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); 749 /* set all the values to 1's */ 750 memset(test, 0xff, PPC_MAX_LINE_SIZE); 751 /* clear one cache block starting at 'test' */ 752 dcbzl(test); 753 754 /* find the size of the cleared area, that's our block size */ 755 for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { 756 if (test[i - 1] == 0) { 757 return i; 758 } 759 } 760 return 0; 761 } 762 763 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 764 #endif 765 766 /* 767 * put other processor and platform specific cache code here 768 * return the smallest cache line size in bytes on the processor 769 * (usually the L1 cache). If the OS has a call, this would be 770 * a greate place to put it. 771 * 772 * If there is no cache, return 0; 773 * 774 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions 775 * below aren't compiled. 776 * 777 */ 778 779 /* If no way to get the processor cache line size has been defined, assume 780 * it's 32 bytes (most common value, does not significantly impact performance) 781 */ 782 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 783 unsigned long 784 s_mpi_getProcessorLineSize() 785 { 786 return 32; 787 } 788 #endif