tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mips_macro.h (12476B)


      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // MIPS common macros
     11 
     12 #ifndef WEBP_DSP_MIPS_MACRO_H_
     13 #define WEBP_DSP_MIPS_MACRO_H_
     14 
     15 #if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
     16 #define WORK_AROUND_GCC
     17 #endif
     18 
     19 #define STR(s) #s
     20 #define XSTR(s) STR(s)
     21 
     22 // O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
     23 // O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
     24 // O - output
     25 // I - input (macro doesn't change it)
     26 #define ADD_SUB_HALVES(O0, O1,                                                 \
     27                       I0, I1)                                                 \
     28  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]           \n\t"      \
     29  "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1 "]           \n\t"
     30 
     31 // O - output
     32 // I - input (macro doesn't change it)
     33 // I[0/1] - offset in bytes
     34 #define LOAD_IN_X2(O0, O1,                                                     \
     35                   I0, I1)                                                     \
     36  "lh               %[" #O0 "],   " #I0 "(%[in])                  \n\t"        \
     37  "lh               %[" #O1 "],   " #I1 "(%[in])                  \n\t"
     38 
     39 // I0 - location
     40 // I1..I9 - offsets in bytes
     41 #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3,                                    \
     42                            I0, I1, I2, I3, I4, I5, I6, I7, I8, I9)            \
     43  "ulw    %[" #O0 "],    " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "])       \n\t"  \
     44  "ulw    %[" #O1 "],    " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "])       \n\t"  \
     45  "ulw    %[" #O2 "],    " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "])       \n\t"  \
     46  "ulw    %[" #O3 "],    " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "])       \n\t"
     47 
     48 
     49 // O - output
     50 // I - input (macro doesn't change it so it should be different from I)
     51 #define MUL_SHIFT_C1(O, I)                                                     \
     52  "mul              %[" #O "],    %[" #I "],    %[kC1]        \n\t"            \
     53  "sra              %[" #O "],    %[" #O "],    16            \n\t"            \
     54  "addu             %[" #O "],    %[" #O "],    %[" #I "]     \n\t"
     55 #define MUL_SHIFT_C2(O, I) \
     56  "mul              %[" #O "],    %[" #I "],    %[kC2]        \n\t"            \
     57  "sra              %[" #O "],    %[" #O "],    16            \n\t"
     58 
     59 // Same as #define MUL_SHIFT_C1 but I and O are the same. It stores the
     60 // intermediary result in TMP.
     61 #define MUL_SHIFT_C1_IO(IO, TMP)                                               \
     62  "mul              %[" #TMP "],  %[" #IO  "], %[kC1]     \n\t"                \
     63  "sra              %[" #TMP "],  %[" #TMP "], 16         \n\t"                \
     64  "addu             %[" #IO  "],  %[" #TMP "], %[" #IO "] \n\t"
     65 
     66 // O - output
     67 // IO - input/output
     68 // I - input (macro doesn't change it)
     69 #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7,                          \
     70                      IO0, IO1, IO2, IO3,                                      \
     71                      I0, I1, I2, I3, I4, I5, I6, I7)                          \
     72  MUL_SHIFT_C2(O0, I0)                                                         \
     73  MUL_SHIFT_C1(O1, I0)                                                         \
     74  MUL_SHIFT_C2(O2, I1)                                                         \
     75  MUL_SHIFT_C1(O3, I1)                                                         \
     76  MUL_SHIFT_C2(O4, I2)                                                         \
     77  MUL_SHIFT_C1(O5, I2)                                                         \
     78  MUL_SHIFT_C2(O6, I3)                                                         \
     79  MUL_SHIFT_C1(O7, I3)                                                         \
     80  "addu             %[" #IO0 "],  %[" #IO0 "],  %[" #I4 "]    \n\t"            \
     81  "addu             %[" #IO1 "],  %[" #IO1 "],  %[" #I5 "]    \n\t"            \
     82  "subu             %[" #IO2 "],  %[" #IO2 "],  %[" #I6 "]    \n\t"            \
     83  "subu             %[" #IO3 "],  %[" #IO3 "],  %[" #I7 "]    \n\t"
     84 
     85 // O - output
     86 // I - input (macro doesn't change it)
     87 #define INSERT_HALF_X2(O0, O1,                                                 \
     88                       I0, I1)                                                 \
     89  "ins              %[" #O0 "],   %[" #I0 "], 16,    16           \n\t"        \
     90  "ins              %[" #O1 "],   %[" #I1 "], 16,    16           \n\t"
     91 
     92 // O - output
     93 // I - input (macro doesn't change it)
     94 #define SRA_16(O0, O1, O2, O3,                                                 \
     95               I0, I1, I2, I3)                                                 \
     96  "sra              %[" #O0 "],  %[" #I0 "],  16                  \n\t"        \
     97  "sra              %[" #O1 "],  %[" #I1 "],  16                  \n\t"        \
     98  "sra              %[" #O2 "],  %[" #I2 "],  16                  \n\t"        \
     99  "sra              %[" #O3 "],  %[" #I3 "],  16                  \n\t"
    100 
    101 // temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
    102 // temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
    103 // temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
    104 // temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
    105 // O - output
    106 // I - input (macro doesn't change it)
    107 #define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7,                         \
    108                       I0, I1, I2, I3, I4, I5, I6, I7)                         \
    109  "addq.ph          %[" #O0 "],   %[" #I0 "],   %[" #I4 "]    \n\t"            \
    110  "subq.ph          %[" #O1 "],   %[" #I0 "],   %[" #I4 "]    \n\t"            \
    111  "addq.ph          %[" #O2 "],   %[" #I1 "],   %[" #I5 "]    \n\t"            \
    112  "subq.ph          %[" #O3 "],   %[" #I1 "],   %[" #I5 "]    \n\t"            \
    113  "addq.ph          %[" #O4 "],   %[" #I2 "],   %[" #I6 "]    \n\t"            \
    114  "subq.ph          %[" #O5 "],   %[" #I2 "],   %[" #I6 "]    \n\t"            \
    115  "addq.ph          %[" #O6 "],   %[" #I3 "],   %[" #I7 "]    \n\t"            \
    116  "subq.ph          %[" #O7 "],   %[" #I3 "],   %[" #I7 "]    \n\t"            \
    117  "shra.ph          %[" #O0 "],   %[" #O0 "],   3             \n\t"            \
    118  "shra.ph          %[" #O1 "],   %[" #O1 "],   3             \n\t"            \
    119  "shra.ph          %[" #O2 "],   %[" #O2 "],   3             \n\t"            \
    120  "shra.ph          %[" #O3 "],   %[" #O3 "],   3             \n\t"            \
    121  "shra.ph          %[" #O4 "],   %[" #O4 "],   3             \n\t"            \
    122  "shra.ph          %[" #O5 "],   %[" #O5 "],   3             \n\t"            \
    123  "shra.ph          %[" #O6 "],   %[" #O6 "],   3             \n\t"            \
    124  "shra.ph          %[" #O7 "],   %[" #O7 "],   3             \n\t"
    125 
    126 // precrq.ph.w temp0, temp8, temp2
    127 //   temp0 = temp8[31..16] | temp2[31..16]
    128 // ins temp2, temp8, 16, 16
    129 //   temp2 = temp8[31..16] | temp2[15..0]
    130 // O - output
    131 // IO - input/output
    132 // I - input (macro doesn't change it)
    133 #define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3,                                  \
    134                              IO0, IO1, IO2, IO3,                              \
    135                              I0, I1, I2, I3)                                  \
    136  "precrq.ph.w      %[" #O0 "],    %[" #I0 "],  %[" #IO0 "]       \n\t"        \
    137  "precrq.ph.w      %[" #O1 "],    %[" #I1 "],  %[" #IO1 "]       \n\t"        \
    138  "ins              %[" #IO0 "],   %[" #I0 "],  16,    16         \n\t"        \
    139  "ins              %[" #IO1 "],   %[" #I1 "],  16,    16         \n\t"        \
    140  "precrq.ph.w      %[" #O2 "],    %[" #I2 "],  %[" #IO2 "]       \n\t"        \
    141  "precrq.ph.w      %[" #O3 "],    %[" #I3 "],  %[" #IO3 "]       \n\t"        \
    142  "ins              %[" #IO2 "],   %[" #I2 "],  16,    16         \n\t"        \
    143  "ins              %[" #IO3 "],   %[" #I3 "],  16,    16         \n\t"
    144 
    145 // preceu.ph.qbr temp0, temp8
    146 //   temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
    147 // preceu.ph.qbl temp1, temp8
    148 //   temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
    149 // O - output
    150 // I - input (macro doesn't change it)
    151 #define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7,                \
    152                                I0, I1, I2, I3)                                \
    153  "preceu.ph.qbr    %[" #O0 "],   %[" #I0 "]                      \n\t"        \
    154  "preceu.ph.qbl    %[" #O1 "],   %[" #I0 "]                      \n\t"        \
    155  "preceu.ph.qbr    %[" #O2 "],   %[" #I1 "]                      \n\t"        \
    156  "preceu.ph.qbl    %[" #O3 "],   %[" #I1 "]                      \n\t"        \
    157  "preceu.ph.qbr    %[" #O4 "],   %[" #I2 "]                      \n\t"        \
    158  "preceu.ph.qbl    %[" #O5 "],   %[" #I2 "]                      \n\t"        \
    159  "preceu.ph.qbr    %[" #O6 "],   %[" #I3 "]                      \n\t"        \
    160  "preceu.ph.qbl    %[" #O7 "],   %[" #I3 "]                      \n\t"
    161 
    162 // temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
    163 // temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
    164 // temp1..temp7 same as temp0
    165 // precrqu_s.qb.ph temp0, temp1, temp0:
    166 //   temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
    167 // store temp0 to dst
    168 // IO - input/output
    169 // I - input (macro doesn't change it)
    170 #define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7,               \
    171                         I0, I1, I2, I3, I4, I5, I6, I7,                       \
    172                         I8, I9, I10, I11, I12, I13)                           \
    173  "addq.ph          %[" #IO0 "],  %[" #IO0 "],  %[" #I0 "]          \n\t"      \
    174  "addq.ph          %[" #IO1 "],  %[" #IO1 "],  %[" #I1 "]          \n\t"      \
    175  "addq.ph          %[" #IO2 "],  %[" #IO2 "],  %[" #I2 "]          \n\t"      \
    176  "addq.ph          %[" #IO3 "],  %[" #IO3 "],  %[" #I3 "]          \n\t"      \
    177  "addq.ph          %[" #IO4 "],  %[" #IO4 "],  %[" #I4 "]          \n\t"      \
    178  "addq.ph          %[" #IO5 "],  %[" #IO5 "],  %[" #I5 "]          \n\t"      \
    179  "addq.ph          %[" #IO6 "],  %[" #IO6 "],  %[" #I6 "]          \n\t"      \
    180  "addq.ph          %[" #IO7 "],  %[" #IO7 "],  %[" #I7 "]          \n\t"      \
    181  "shll_s.ph        %[" #IO0 "],  %[" #IO0 "],  7                   \n\t"      \
    182  "shll_s.ph        %[" #IO1 "],  %[" #IO1 "],  7                   \n\t"      \
    183  "shll_s.ph        %[" #IO2 "],  %[" #IO2 "],  7                   \n\t"      \
    184  "shll_s.ph        %[" #IO3 "],  %[" #IO3 "],  7                   \n\t"      \
    185  "shll_s.ph        %[" #IO4 "],  %[" #IO4 "],  7                   \n\t"      \
    186  "shll_s.ph        %[" #IO5 "],  %[" #IO5 "],  7                   \n\t"      \
    187  "shll_s.ph        %[" #IO6 "],  %[" #IO6 "],  7                   \n\t"      \
    188  "shll_s.ph        %[" #IO7 "],  %[" #IO7 "],  7                   \n\t"      \
    189  "precrqu_s.qb.ph  %[" #IO0 "],  %[" #IO1 "],  %[" #IO0 "]         \n\t"      \
    190  "precrqu_s.qb.ph  %[" #IO2 "],  %[" #IO3 "],  %[" #IO2 "]         \n\t"      \
    191  "precrqu_s.qb.ph  %[" #IO4 "],  %[" #IO5 "],  %[" #IO4 "]         \n\t"      \
    192  "precrqu_s.qb.ph  %[" #IO6 "],  %[" #IO7 "],  %[" #IO6 "]         \n\t"      \
    193  "usw              %[" #IO0 "],  " XSTR(I13) "*" #I9 "(%[" #I8 "])   \n\t"    \
    194  "usw              %[" #IO2 "],  " XSTR(I13) "*" #I10 "(%[" #I8 "])  \n\t"    \
    195  "usw              %[" #IO4 "],  " XSTR(I13) "*" #I11 "(%[" #I8 "])  \n\t"    \
    196  "usw              %[" #IO6 "],  " XSTR(I13) "*" #I12 "(%[" #I8 "])  \n\t"
    197 
    198 #define OUTPUT_EARLY_CLOBBER_REGS_10()                                         \
    199  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),             \
    200    [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),             \
    201    [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9),             \
    202    [temp10]"=&r"(temp10)
    203 
    204 #define OUTPUT_EARLY_CLOBBER_REGS_18()                                         \
    205  OUTPUT_EARLY_CLOBBER_REGS_10(),                                              \
    206  [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13),         \
    207  [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16),         \
    208  [temp17]"=&r"(temp17), [temp18]"=&r"(temp18)
    209 
    210 #endif  // WEBP_DSP_MIPS_MACRO_H_