tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mpcpucache.c (23757B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mpi.h"
      6 #include "prtypes.h"
      7 
      8 /*
      9 * This file implements a single function: s_mpi_getProcessorLineSize();
     10 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
     11 * if a cache exists, or zero if there is no cache. If more than one
     12 * cache line exists, it should return the smallest line size (which is
     13 * usually the L1 cache).
     14 *
     15 * mp_modexp uses this information to make sure that private key information
     16 * isn't being leaked through the cache.
     17 *
     18 * Currently the file returns good data for most modern x86 processors, and
     19 * reasonable data on 64-bit ppc processors. All other processors are assumed
     20 * to have a cache line size of 32 bytes.
     21 *
     22 */
     23 
     24 #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
     25 /* X86 processors have special instructions that tell us about the cache */
     26 #include "string.h"
     27 
     28 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
     29 #define AMD_64 1
     30 #endif
     31 
     32 /* Generic CPUID function */
     33 #if defined(AMD_64)
     34 
     35 #if defined(__GNUC__)
     36 
     37 void
     38 freebl_cpuid(unsigned long op, unsigned long *eax,
     39             unsigned long *ebx, unsigned long *ecx,
     40             unsigned long *edx)
     41 {
     42    __asm__("xor %%ecx, %%ecx\n\t"
     43            "cpuid\n\t"
     44            : "=a"(*eax),
     45              "=b"(*ebx),
     46              "=c"(*ecx),
     47              "=d"(*edx)
     48            : "0"(op));
     49 }
     50 
     51 #elif defined(_MSC_VER)
     52 
     53 #include <intrin.h>
     54 
     55 void
     56 freebl_cpuid(unsigned long op, unsigned long *eax,
     57             unsigned long *ebx, unsigned long *ecx,
     58             unsigned long *edx)
     59 {
     60    int intrinsic_out[4];
     61 
     62    __cpuid(intrinsic_out, op);
     63    *eax = intrinsic_out[0];
     64    *ebx = intrinsic_out[1];
     65    *ecx = intrinsic_out[2];
     66    *edx = intrinsic_out[3];
     67 }
     68 
     69 #endif
     70 
     71 #else /* !defined(AMD_64) */
     72 
     73 /* x86 */
     74 
     75 #if defined(__GNUC__)
     76 void
     77 freebl_cpuid(unsigned long op, unsigned long *eax,
     78             unsigned long *ebx, unsigned long *ecx,
     79             unsigned long *edx)
     80 {
     81    /* Some older processors don't fill the ecx register with cpuid, so clobber it
     82     * before calling cpuid, so that there's no risk of picking random bits that
     83     * erroneously indicate that absent CPU features are present.
     84     * Also, GCC isn't smart enough to save the ebx PIC register on its own
     85     * in this case, so do it by hand. Use edi to store ebx and pass the
     86     * value returned in ebx from cpuid through edi. */
     87    __asm__("xor %%ecx, %%ecx\n\t"
     88            "mov %%ebx,%%edi\n\t"
     89            "cpuid\n\t"
     90            "xchgl %%ebx,%%edi\n\t"
     91            : "=a"(*eax),
     92              "=D"(*ebx),
     93              "=c"(*ecx),
     94              "=d"(*edx)
     95            : "0"(op));
     96 }
     97 
     98 /*
     99 * try flipping a processor flag to determine CPU type
    100 */
    101 static unsigned long
    102 changeFlag(unsigned long flag)
    103 {
    104    unsigned long changedFlags, originalFlags;
    105    __asm__("pushfl\n\t" /* get the flags */
    106            "popl %0\n\t"
    107            "movl %0,%1\n\t" /* save the original flags */
    108            "xorl %2,%0\n\t" /* flip the bit */
    109            "pushl %0\n\t"   /* set the flags */
    110            "popfl\n\t"
    111            "pushfl\n\t" /* get the flags again (for return) */
    112            "popl %0\n\t"
    113            "pushl %1\n\t" /* restore the original flags */
    114            "popfl\n\t"
    115            : "=r"(changedFlags),
    116              "=r"(originalFlags),
    117              "=r"(flag)
    118            : "2"(flag));
    119    return changedFlags ^ originalFlags;
    120 }
    121 
    122 #elif defined(_MSC_VER)
    123 
    124 /*
    125 * windows versions of the above assembler
    126 */
    127 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
    128 void
    129 freebl_cpuid(unsigned long op, unsigned long *Reax,
    130             unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
    131 {
    132    unsigned long Leax, Lebx, Lecx, Ledx;
    133    __asm {
    134        pushad
    135        xor     ecx,ecx
    136        mov     eax,op
    137        wcpuid
    138        mov     Leax,eax
    139        mov     Lebx,ebx
    140        mov     Lecx,ecx
    141        mov     Ledx,edx
    142        popad
    143    }
    144    *Reax = Leax;
    145    *Rebx = Lebx;
    146    *Recx = Lecx;
    147    *Redx = Ledx;
    148 }
    149 
    150 static unsigned long
    151 changeFlag(unsigned long flag)
    152 {
    153    unsigned long changedFlags, originalFlags;
    154    __asm {
    155        push eax
    156        push ebx
    157        pushfd /* get the flags */
    158            pop  eax
    159        push eax /* save the flags on the stack */
    160            mov  originalFlags,eax /* save the original flags */
    161        mov  ebx,flag
    162            xor  eax,ebx /* flip the bit */
    163        push eax /* set the flags */
    164            popfd
    165        pushfd /* get the flags again (for return) */
    166        pop  eax
    167        popfd /* restore the original flags */
    168        mov changedFlags,eax
    169        pop ebx
    170        pop eax
    171    }
    172    return changedFlags ^ originalFlags;
    173 }
    174 #endif
    175 
    176 #endif
    177 
    178 #if !defined(AMD_64)
    179 #define AC_FLAG 0x40000
    180 #define ID_FLAG 0x200000
    181 
    182 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
    183 static int
    184 is386()
    185 {
    186    return changeFlag(AC_FLAG) == 0;
    187 }
    188 
    189 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
    190 static int
    191 is486()
    192 {
    193    return changeFlag(ID_FLAG) == 0;
    194 }
    195 #endif
    196 
    197 /*
    198 * table for Intel Cache.
    199 * See Intel Application Note AP-485 for more information
    200 */
    201 
    202 typedef unsigned char CacheTypeEntry;
    203 
    204 typedef enum {
    205    Cache_NONE = 0,
    206    Cache_UNKNOWN = 1,
    207    Cache_TLB = 2,
    208    Cache_TLBi = 3,
    209    Cache_TLBd = 4,
    210    Cache_Trace = 5,
    211    Cache_L1 = 6,
    212    Cache_L1i = 7,
    213    Cache_L1d = 8,
    214    Cache_L2 = 9,
    215    Cache_L2i = 10,
    216    Cache_L2d = 11,
    217    Cache_L3 = 12,
    218    Cache_L3i = 13,
    219    Cache_L3d = 14
    220 } CacheType;
    221 
    222 struct _cache {
    223    CacheTypeEntry type;
    224    unsigned char lineSize;
    225 };
    226 static const struct _cache CacheMap[256] = {
    227    /* 00 */ { Cache_NONE, 0 },
    228    /* 01 */ { Cache_TLBi, 0 },
    229    /* 02 */ { Cache_TLBi, 0 },
    230    /* 03 */ { Cache_TLBd, 0 },
    231    /* 04 */ {
    232        Cache_TLBd,
    233    },
    234    /* 05 */ { Cache_UNKNOWN, 0 },
    235    /* 06 */ { Cache_L1i, 32 },
    236    /* 07 */ { Cache_UNKNOWN, 0 },
    237    /* 08 */ { Cache_L1i, 32 },
    238    /* 09 */ { Cache_UNKNOWN, 0 },
    239    /* 0a */ { Cache_L1d, 32 },
    240    /* 0b */ { Cache_UNKNOWN, 0 },
    241    /* 0c */ { Cache_L1d, 32 },
    242    /* 0d */ { Cache_UNKNOWN, 0 },
    243    /* 0e */ { Cache_UNKNOWN, 0 },
    244    /* 0f */ { Cache_UNKNOWN, 0 },
    245    /* 10 */ { Cache_UNKNOWN, 0 },
    246    /* 11 */ { Cache_UNKNOWN, 0 },
    247    /* 12 */ { Cache_UNKNOWN, 0 },
    248    /* 13 */ { Cache_UNKNOWN, 0 },
    249    /* 14 */ { Cache_UNKNOWN, 0 },
    250    /* 15 */ { Cache_UNKNOWN, 0 },
    251    /* 16 */ { Cache_UNKNOWN, 0 },
    252    /* 17 */ { Cache_UNKNOWN, 0 },
    253    /* 18 */ { Cache_UNKNOWN, 0 },
    254    /* 19 */ { Cache_UNKNOWN, 0 },
    255    /* 1a */ { Cache_UNKNOWN, 0 },
    256    /* 1b */ { Cache_UNKNOWN, 0 },
    257    /* 1c */ { Cache_UNKNOWN, 0 },
    258    /* 1d */ { Cache_UNKNOWN, 0 },
    259    /* 1e */ { Cache_UNKNOWN, 0 },
    260    /* 1f */ { Cache_UNKNOWN, 0 },
    261    /* 20 */ { Cache_UNKNOWN, 0 },
    262    /* 21 */ { Cache_UNKNOWN, 0 },
    263    /* 22 */ { Cache_L3, 64 },
    264    /* 23 */ { Cache_L3, 64 },
    265    /* 24 */ { Cache_UNKNOWN, 0 },
    266    /* 25 */ { Cache_L3, 64 },
    267    /* 26 */ { Cache_UNKNOWN, 0 },
    268    /* 27 */ { Cache_UNKNOWN, 0 },
    269    /* 28 */ { Cache_UNKNOWN, 0 },
    270    /* 29 */ { Cache_L3, 64 },
    271    /* 2a */ { Cache_UNKNOWN, 0 },
    272    /* 2b */ { Cache_UNKNOWN, 0 },
    273    /* 2c */ { Cache_L1d, 64 },
    274    /* 2d */ { Cache_UNKNOWN, 0 },
    275    /* 2e */ { Cache_UNKNOWN, 0 },
    276    /* 2f */ { Cache_UNKNOWN, 0 },
    277    /* 30 */ { Cache_L1i, 64 },
    278    /* 31 */ { Cache_UNKNOWN, 0 },
    279    /* 32 */ { Cache_UNKNOWN, 0 },
    280    /* 33 */ { Cache_UNKNOWN, 0 },
    281    /* 34 */ { Cache_UNKNOWN, 0 },
    282    /* 35 */ { Cache_UNKNOWN, 0 },
    283    /* 36 */ { Cache_UNKNOWN, 0 },
    284    /* 37 */ { Cache_UNKNOWN, 0 },
    285    /* 38 */ { Cache_UNKNOWN, 0 },
    286    /* 39 */ { Cache_L2, 64 },
    287    /* 3a */ { Cache_UNKNOWN, 0 },
    288    /* 3b */ { Cache_L2, 64 },
    289    /* 3c */ { Cache_L2, 64 },
    290    /* 3d */ { Cache_UNKNOWN, 0 },
    291    /* 3e */ { Cache_UNKNOWN, 0 },
    292    /* 3f */ { Cache_UNKNOWN, 0 },
    293    /* 40 */ { Cache_L2, 0 },
    294    /* 41 */ { Cache_L2, 32 },
    295    /* 42 */ { Cache_L2, 32 },
    296    /* 43 */ { Cache_L2, 32 },
    297    /* 44 */ { Cache_L2, 32 },
    298    /* 45 */ { Cache_L2, 32 },
    299    /* 46 */ { Cache_UNKNOWN, 0 },
    300    /* 47 */ { Cache_UNKNOWN, 0 },
    301    /* 48 */ { Cache_UNKNOWN, 0 },
    302    /* 49 */ { Cache_UNKNOWN, 0 },
    303    /* 4a */ { Cache_UNKNOWN, 0 },
    304    /* 4b */ { Cache_UNKNOWN, 0 },
    305    /* 4c */ { Cache_UNKNOWN, 0 },
    306    /* 4d */ { Cache_UNKNOWN, 0 },
    307    /* 4e */ { Cache_UNKNOWN, 0 },
    308    /* 4f */ { Cache_UNKNOWN, 0 },
    309    /* 50 */ { Cache_TLBi, 0 },
    310    /* 51 */ { Cache_TLBi, 0 },
    311    /* 52 */ { Cache_TLBi, 0 },
    312    /* 53 */ { Cache_UNKNOWN, 0 },
    313    /* 54 */ { Cache_UNKNOWN, 0 },
    314    /* 55 */ { Cache_UNKNOWN, 0 },
    315    /* 56 */ { Cache_UNKNOWN, 0 },
    316    /* 57 */ { Cache_UNKNOWN, 0 },
    317    /* 58 */ { Cache_UNKNOWN, 0 },
    318    /* 59 */ { Cache_UNKNOWN, 0 },
    319    /* 5a */ { Cache_UNKNOWN, 0 },
    320    /* 5b */ { Cache_TLBd, 0 },
    321    /* 5c */ { Cache_TLBd, 0 },
    322    /* 5d */ { Cache_TLBd, 0 },
    323    /* 5e */ { Cache_UNKNOWN, 0 },
    324    /* 5f */ { Cache_UNKNOWN, 0 },
    325    /* 60 */ { Cache_UNKNOWN, 0 },
    326    /* 61 */ { Cache_UNKNOWN, 0 },
    327    /* 62 */ { Cache_UNKNOWN, 0 },
    328    /* 63 */ { Cache_UNKNOWN, 0 },
    329    /* 64 */ { Cache_UNKNOWN, 0 },
    330    /* 65 */ { Cache_UNKNOWN, 0 },
    331    /* 66 */ { Cache_L1d, 64 },
    332    /* 67 */ { Cache_L1d, 64 },
    333    /* 68 */ { Cache_L1d, 64 },
    334    /* 69 */ { Cache_UNKNOWN, 0 },
    335    /* 6a */ { Cache_UNKNOWN, 0 },
    336    /* 6b */ { Cache_UNKNOWN, 0 },
    337    /* 6c */ { Cache_UNKNOWN, 0 },
    338    /* 6d */ { Cache_UNKNOWN, 0 },
    339    /* 6e */ { Cache_UNKNOWN, 0 },
    340    /* 6f */ { Cache_UNKNOWN, 0 },
    341    /* 70 */ { Cache_Trace, 1 },
    342    /* 71 */ { Cache_Trace, 1 },
    343    /* 72 */ { Cache_Trace, 1 },
    344    /* 73 */ { Cache_UNKNOWN, 0 },
    345    /* 74 */ { Cache_UNKNOWN, 0 },
    346    /* 75 */ { Cache_UNKNOWN, 0 },
    347    /* 76 */ { Cache_UNKNOWN, 0 },
    348    /* 77 */ { Cache_UNKNOWN, 0 },
    349    /* 78 */ { Cache_UNKNOWN, 0 },
    350    /* 79 */ { Cache_L2, 64 },
    351    /* 7a */ { Cache_L2, 64 },
    352    /* 7b */ { Cache_L2, 64 },
    353    /* 7c */ { Cache_L2, 64 },
    354    /* 7d */ { Cache_UNKNOWN, 0 },
    355    /* 7e */ { Cache_UNKNOWN, 0 },
    356    /* 7f */ { Cache_UNKNOWN, 0 },
    357    /* 80 */ { Cache_UNKNOWN, 0 },
    358    /* 81 */ { Cache_UNKNOWN, 0 },
    359    /* 82 */ { Cache_L2, 32 },
    360    /* 83 */ { Cache_L2, 32 },
    361    /* 84 */ { Cache_L2, 32 },
    362    /* 85 */ { Cache_L2, 32 },
    363    /* 86 */ { Cache_L2, 64 },
    364    /* 87 */ { Cache_L2, 64 },
    365    /* 88 */ { Cache_UNKNOWN, 0 },
    366    /* 89 */ { Cache_UNKNOWN, 0 },
    367    /* 8a */ { Cache_UNKNOWN, 0 },
    368    /* 8b */ { Cache_UNKNOWN, 0 },
    369    /* 8c */ { Cache_UNKNOWN, 0 },
    370    /* 8d */ { Cache_UNKNOWN, 0 },
    371    /* 8e */ { Cache_UNKNOWN, 0 },
    372    /* 8f */ { Cache_UNKNOWN, 0 },
    373    /* 90 */ { Cache_UNKNOWN, 0 },
    374    /* 91 */ { Cache_UNKNOWN, 0 },
    375    /* 92 */ { Cache_UNKNOWN, 0 },
    376    /* 93 */ { Cache_UNKNOWN, 0 },
    377    /* 94 */ { Cache_UNKNOWN, 0 },
    378    /* 95 */ { Cache_UNKNOWN, 0 },
    379    /* 96 */ { Cache_UNKNOWN, 0 },
    380    /* 97 */ { Cache_UNKNOWN, 0 },
    381    /* 98 */ { Cache_UNKNOWN, 0 },
    382    /* 99 */ { Cache_UNKNOWN, 0 },
    383    /* 9a */ { Cache_UNKNOWN, 0 },
    384    /* 9b */ { Cache_UNKNOWN, 0 },
    385    /* 9c */ { Cache_UNKNOWN, 0 },
    386    /* 9d */ { Cache_UNKNOWN, 0 },
    387    /* 9e */ { Cache_UNKNOWN, 0 },
    388    /* 9f */ { Cache_UNKNOWN, 0 },
    389    /* a0 */ { Cache_UNKNOWN, 0 },
    390    /* a1 */ { Cache_UNKNOWN, 0 },
    391    /* a2 */ { Cache_UNKNOWN, 0 },
    392    /* a3 */ { Cache_UNKNOWN, 0 },
    393    /* a4 */ { Cache_UNKNOWN, 0 },
    394    /* a5 */ { Cache_UNKNOWN, 0 },
    395    /* a6 */ { Cache_UNKNOWN, 0 },
    396    /* a7 */ { Cache_UNKNOWN, 0 },
    397    /* a8 */ { Cache_UNKNOWN, 0 },
    398    /* a9 */ { Cache_UNKNOWN, 0 },
    399    /* aa */ { Cache_UNKNOWN, 0 },
    400    /* ab */ { Cache_UNKNOWN, 0 },
    401    /* ac */ { Cache_UNKNOWN, 0 },
    402    /* ad */ { Cache_UNKNOWN, 0 },
    403    /* ae */ { Cache_UNKNOWN, 0 },
    404    /* af */ { Cache_UNKNOWN, 0 },
    405    /* b0 */ { Cache_TLBi, 0 },
    406    /* b1 */ { Cache_UNKNOWN, 0 },
    407    /* b2 */ { Cache_UNKNOWN, 0 },
    408    /* b3 */ { Cache_TLBd, 0 },
    409    /* b4 */ { Cache_UNKNOWN, 0 },
    410    /* b5 */ { Cache_UNKNOWN, 0 },
    411    /* b6 */ { Cache_UNKNOWN, 0 },
    412    /* b7 */ { Cache_UNKNOWN, 0 },
    413    /* b8 */ { Cache_UNKNOWN, 0 },
    414    /* b9 */ { Cache_UNKNOWN, 0 },
    415    /* ba */ { Cache_UNKNOWN, 0 },
    416    /* bb */ { Cache_UNKNOWN, 0 },
    417    /* bc */ { Cache_UNKNOWN, 0 },
    418    /* bd */ { Cache_UNKNOWN, 0 },
    419    /* be */ { Cache_UNKNOWN, 0 },
    420    /* bf */ { Cache_UNKNOWN, 0 },
    421    /* c0 */ { Cache_UNKNOWN, 0 },
    422    /* c1 */ { Cache_UNKNOWN, 0 },
    423    /* c2 */ { Cache_UNKNOWN, 0 },
    424    /* c3 */ { Cache_UNKNOWN, 0 },
    425    /* c4 */ { Cache_UNKNOWN, 0 },
    426    /* c5 */ { Cache_UNKNOWN, 0 },
    427    /* c6 */ { Cache_UNKNOWN, 0 },
    428    /* c7 */ { Cache_UNKNOWN, 0 },
    429    /* c8 */ { Cache_UNKNOWN, 0 },
    430    /* c9 */ { Cache_UNKNOWN, 0 },
    431    /* ca */ { Cache_UNKNOWN, 0 },
    432    /* cb */ { Cache_UNKNOWN, 0 },
    433    /* cc */ { Cache_UNKNOWN, 0 },
    434    /* cd */ { Cache_UNKNOWN, 0 },
    435    /* ce */ { Cache_UNKNOWN, 0 },
    436    /* cf */ { Cache_UNKNOWN, 0 },
    437    /* d0 */ { Cache_UNKNOWN, 0 },
    438    /* d1 */ { Cache_UNKNOWN, 0 },
    439    /* d2 */ { Cache_UNKNOWN, 0 },
    440    /* d3 */ { Cache_UNKNOWN, 0 },
    441    /* d4 */ { Cache_UNKNOWN, 0 },
    442    /* d5 */ { Cache_UNKNOWN, 0 },
    443    /* d6 */ { Cache_UNKNOWN, 0 },
    444    /* d7 */ { Cache_UNKNOWN, 0 },
    445    /* d8 */ { Cache_UNKNOWN, 0 },
    446    /* d9 */ { Cache_UNKNOWN, 0 },
    447    /* da */ { Cache_UNKNOWN, 0 },
    448    /* db */ { Cache_UNKNOWN, 0 },
    449    /* dc */ { Cache_UNKNOWN, 0 },
    450    /* dd */ { Cache_UNKNOWN, 0 },
    451    /* de */ { Cache_UNKNOWN, 0 },
    452    /* df */ { Cache_UNKNOWN, 0 },
    453    /* e0 */ { Cache_UNKNOWN, 0 },
    454    /* e1 */ { Cache_UNKNOWN, 0 },
    455    /* e2 */ { Cache_UNKNOWN, 0 },
    456    /* e3 */ { Cache_UNKNOWN, 0 },
    457    /* e4 */ { Cache_UNKNOWN, 0 },
    458    /* e5 */ { Cache_UNKNOWN, 0 },
    459    /* e6 */ { Cache_UNKNOWN, 0 },
    460    /* e7 */ { Cache_UNKNOWN, 0 },
    461    /* e8 */ { Cache_UNKNOWN, 0 },
    462    /* e9 */ { Cache_UNKNOWN, 0 },
    463    /* ea */ { Cache_UNKNOWN, 0 },
    464    /* eb */ { Cache_UNKNOWN, 0 },
    465    /* ec */ { Cache_UNKNOWN, 0 },
    466    /* ed */ { Cache_UNKNOWN, 0 },
    467    /* ee */ { Cache_UNKNOWN, 0 },
    468    /* ef */ { Cache_UNKNOWN, 0 },
    469    /* f0 */ { Cache_UNKNOWN, 0 },
    470    /* f1 */ { Cache_UNKNOWN, 0 },
    471    /* f2 */ { Cache_UNKNOWN, 0 },
    472    /* f3 */ { Cache_UNKNOWN, 0 },
    473    /* f4 */ { Cache_UNKNOWN, 0 },
    474    /* f5 */ { Cache_UNKNOWN, 0 },
    475    /* f6 */ { Cache_UNKNOWN, 0 },
    476    /* f7 */ { Cache_UNKNOWN, 0 },
    477    /* f8 */ { Cache_UNKNOWN, 0 },
    478    /* f9 */ { Cache_UNKNOWN, 0 },
    479    /* fa */ { Cache_UNKNOWN, 0 },
    480    /* fb */ { Cache_UNKNOWN, 0 },
    481    /* fc */ { Cache_UNKNOWN, 0 },
    482    /* fd */ { Cache_UNKNOWN, 0 },
    483    /* fe */ { Cache_UNKNOWN, 0 },
    484    /* ff */ { Cache_UNKNOWN, 0 }
    485 };
    486 
    487 /*
    488 * use the above table to determine the CacheEntryLineSize.
    489 */
    490 static void
    491 getIntelCacheEntryLineSize(unsigned long val, int *level,
    492                           unsigned long *lineSize)
    493 {
    494    CacheType type;
    495 
    496    type = CacheMap[val].type;
    497    /* only interested in data caches */
    498    /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
    499     * this data check has the side effect of rejecting that entry. If
    500     * that wasn't the case, we could have to reject it explicitly */
    501    if (CacheMap[val].lineSize == 0) {
    502        return;
    503    }
    504    /* look at the caches, skip types we aren't interested in.
    505     * if we already have a value for a lower level cache, skip the
    506     * current entry */
    507    if ((type == Cache_L1) || (type == Cache_L1d)) {
    508        *level = 1;
    509        *lineSize = CacheMap[val].lineSize;
    510    } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
    511        *level = 2;
    512        *lineSize = CacheMap[val].lineSize;
    513    } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
    514        *level = 3;
    515        *lineSize = CacheMap[val].lineSize;
    516    }
    517    return;
    518 }
    519 
    520 static void
    521 getIntelRegisterCacheLineSize(unsigned long val,
    522                              int *level, unsigned long *lineSize)
    523 {
    524    getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
    525    getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
    526    getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
    527    getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
    528 }
    529 
    530 /*
    531 * returns '0' if no recognized cache is found, or if the cache
    532 * information is supported by this processor
    533 */
    534 static unsigned long
    535 getIntelCacheLineSize(int cpuidLevel)
    536 {
    537    int level = 4;
    538    unsigned long lineSize = 0;
    539    unsigned long eax, ebx, ecx, edx;
    540    int repeat, count;
    541 
    542    if (cpuidLevel < 2) {
    543        return 0;
    544    }
    545 
    546    /* command '2' of the cpuid is intel's cache info call. Each byte of the
    547     * 4 registers contain a potential descriptor for the cache. The CacheMap
    548     * table maps the cache entry with the processor cache. Register 'al'
    549     * contains a count value that cpuid '2' needs to be called in order to
    550     * find all the cache descriptors. Only registers with the high bit set
    551     * to 'zero' have valid descriptors. This code loops through all the
    552     * required calls to cpuid '2' and passes any valid descriptors it finds
    553     * to the getIntelRegisterCacheLineSize code, which breaks the registers
    554     * down into their component descriptors. In the end the lineSize of the
    555     * lowest level cache data cache is returned. */
    556    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
    557    repeat = eax & 0xf;
    558    for (count = 0; count < repeat; count++) {
    559        if ((eax & 0x80000000) == 0) {
    560            getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
    561        }
    562        if ((ebx & 0x80000000) == 0) {
    563            getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
    564        }
    565        if ((ecx & 0x80000000) == 0) {
    566            getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
    567        }
    568        if ((edx & 0x80000000) == 0) {
    569            getIntelRegisterCacheLineSize(edx, &level, &lineSize);
    570        }
    571        if (count + 1 != repeat) {
    572            freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
    573        }
    574    }
    575    return lineSize;
    576 }
    577 
    578 /*
    579 * returns '0' if the cache info is not supported by this processor.
    580 * This is based on the AMD extended cache commands for cpuid.
    581 * (see "AMD Processor Recognition Application Note" Publication 20734).
    582 * Some other processors use the identical scheme.
    583 * (see "Processor Recognition, Transmeta Corporation").
    584 */
    585 static unsigned long
    586 getOtherCacheLineSize(unsigned long cpuidLevel)
    587 {
    588    unsigned long lineSize = 0;
    589    unsigned long eax, ebx, ecx, edx;
    590 
    591    /* get the Extended CPUID level */
    592    freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
    593    cpuidLevel = eax;
    594 
    595    if (cpuidLevel >= 0x80000005) {
    596        freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
    597        lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
    598    }
    599    return lineSize;
    600 }
    601 
    602 static const char *const manMap[] = {
    603 #define INTEL 0
    604    "GenuineIntel",
    605 #define AMD 1
    606    "AuthenticAMD",
    607 #define CYRIX 2
    608    "CyrixInstead",
    609 #define CENTAUR 2
    610    "CentaurHauls",
    611 #define NEXGEN 3
    612    "NexGenDriven",
    613 #define TRANSMETA 4
    614    "GenuineTMx86",
    615 #define RISE 5
    616    "RiseRiseRise",
    617 #define UMC 6
    618    "UMC UMC UMC ",
    619 #define SIS 7
    620    "Sis Sis Sis ",
    621 #define NATIONAL 8
    622    "Geode by NSC",
    623 };
    624 
    625 static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
    626 
    627 #define MAN_UNKNOWN 9
    628 
    629 #if !defined(AMD_64)
    630 #define SSE2_FLAG (1 << 26)
    631 unsigned long
    632 s_mpi_is_sse2()
    633 {
    634    unsigned long eax, ebx, ecx, edx;
    635 
    636    if (is386() || is486()) {
    637        return 0;
    638    }
    639    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
    640 
    641    /* has no SSE2 extensions */
    642    if (eax == 0) {
    643        return 0;
    644    }
    645 
    646    freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
    647    return (edx & SSE2_FLAG) == SSE2_FLAG;
    648 }
    649 #endif
    650 
    651 unsigned long
    652 s_mpi_getProcessorLineSize()
    653 {
    654    unsigned long eax, ebx, ecx, edx;
    655    PRUint32 cpuid[3];
    656    unsigned long cpuidLevel;
    657    unsigned long cacheLineSize = 0;
    658    int manufacturer = MAN_UNKNOWN;
    659    int i;
    660    char string[13];
    661 
    662 #if !defined(AMD_64)
    663    if (is386()) {
    664        return 0; /* 386 had no cache */
    665    }
    666    if (is486()) {
    667        return 32; /* really? need more info */
    668    }
    669 #endif
    670 
    671    /* Pentium, cpuid command is available */
    672    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
    673    cpuidLevel = eax;
    674    /* string holds the CPU's manufacturer ID string - a twelve
    675     * character ASCII string stored in ebx, edx, ecx, and
    676     * the 32-bit extended feature flags are in edx, ecx.
    677     */
    678    cpuid[0] = ebx;
    679    cpuid[1] = ecx;
    680    cpuid[2] = edx;
    681    memcpy(string, cpuid, sizeof(cpuid));
    682    string[12] = 0;
    683 
    684    manufacturer = MAN_UNKNOWN;
    685    for (i = 0; i < n_manufacturers; i++) {
    686        if (strcmp(manMap[i], string) == 0) {
    687            manufacturer = i;
    688        }
    689    }
    690 
    691    if (manufacturer == INTEL) {
    692        cacheLineSize = getIntelCacheLineSize(cpuidLevel);
    693    } else {
    694        cacheLineSize = getOtherCacheLineSize(cpuidLevel);
    695    }
    696    /* doesn't support cache info based on cpuid. This means
    697     * an old pentium class processor, which have cache lines of
    698     * 32. If we learn differently, we can use a switch based on
    699     * the Manufacturer id  */
    700    if (cacheLineSize == 0) {
    701        cacheLineSize = 32;
    702    }
    703    return cacheLineSize;
    704 }
    705 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
    706 #endif
    707 
    708 #if defined(__ppc64__)
    709 /*
    710 *  Sigh, The PPC has some really nice features to help us determine cache
    711 *  size, since it had lots of direct control functions to do so. The POWER
    712 *  processor even has an instruction to do this, but it was dropped in
    713 *  PowerPC. Unfortunately most of them are not available in user mode.
    714 *
    715 *  The dcbz function would be a great way to determine cache line size except
    716 *  1) it only works on write-back memory (it throws an exception otherwise),
    717 *  and 2) because so many mac programs 'knew' the processor cache size was
    718 *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
    719 *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
    720 *  these programs happy. dcbzl work if 64 bit instructions are supported.
    721 *  If you know 64 bit instructions are supported, and that stack is
    722 *  write-back, you can use this code.
    723 */
    724 #include "memory.h"
    725 
    726 /* clear the cache line that contains 'array' */
    727 static inline void
    728 dcbzl(char *array)
    729 {
    730    __asm__("dcbzl %0, %1"
    731            : /*no result*/
    732            : "b%"(array), "r"(0)
    733            : "memory");
    734 }
    735 
    736 #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
    737 
    738 #define PPC_MAX_LINE_SIZE 256
    739 unsigned long
    740 s_mpi_getProcessorLineSize()
    741 {
    742    char testArray[2 * PPC_MAX_LINE_SIZE + 1];
    743    char *test;
    744    int i;
    745 
    746    /* align the array on a maximum line size boundary, so we
    747     * know we are starting to clear from the first address */
    748    test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
    749    /* set all the values to 1's */
    750    memset(test, 0xff, PPC_MAX_LINE_SIZE);
    751    /* clear one cache block starting at 'test' */
    752    dcbzl(test);
    753 
    754    /* find the size of the cleared area, that's our block size */
    755    for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
    756        if (test[i - 1] == 0) {
    757            return i;
    758        }
    759    }
    760    return 0;
    761 }
    762 
    763 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
    764 #endif
    765 
    766 /*
    767 * put other processor and platform specific cache code here
    768 * return the smallest cache line size in bytes on the processor
    769 * (usually the L1 cache). If the OS has a call, this would be
    770 * a greate place to put it.
    771 *
    772 * If there is no cache, return 0;
    773 *
    774 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
    775 * below aren't compiled.
    776 *
    777 */
    778 
    779 /* If no way to get the processor cache line size has been defined, assume
    780 * it's 32 bytes (most common value, does not significantly impact performance)
    781 */
    782 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
    783 unsigned long
    784 s_mpi_getProcessorLineSize()
    785 {
    786    return 32;
    787 }
    788 #endif