[ tor-browser ].git.dasho

IntegerGemmIntrinsic.h (19336B)
      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * This Source Code Form is subject to the terms of the Mozilla Public
      5 * License, v. 2.0. If a copy of the MPL was not distributed with this
      6 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
      7 */
      8 
      9 #ifndef intgemm_IntegerGemmIntrinsic_h
     10 #define intgemm_IntegerGemmIntrinsic_h
     11 
     12 #include <stdint.h>
     13 
     14 namespace js {
     15 namespace wasm {
     16 class Instance;
     17 }
     18 
     19 namespace intgemm {
     20 
     21 /* Interface for integer matrix multiplication followed by addition of bias.
     22 *
     23 * C = A * B + Bias
     24 *
     25 * Input matrix A:
     26 *  - A 2-D matrix that typically represents activations as floating point
     27 * values
     28 *  - no. of rows should be a positive integer
     29 *  - no. of columns should be a positive integeral multiple of 64
     30 *  - is represented as array (contiguous memory locations) in row-major format
     31 *
     32 * Input matrix B:
     33 *  - A 2-D matrix that typically represents fixed model parameters as
     34 * floating point values
     35 *  - no. of rows should be:
     36 *    -- equal to no. of columns of Input matrix A
     37 *    -- a positive integeral multiple of 64
     38 *  - no. of columns should be a positive integeral multiple of 8
     39 *  - is represented as array (contiguous memory locations) in row-major format
     40 *
     41 *  Please note that it is also possible to pass Input matrix B in 2 more forms:
     42 *   - One that is already a quantized and transposed version of Input matrix B
     43 *   - Other that is already a transposed version of Input matrix B
     44 *
     45 * Input Bias:
     46 *  - is an array (contiguous memory locations) that represents bias
     47 *  - size of the array should be equal to the no. of columns of Input matrix B
     48 *
     49 * Output matrix C:
     50 *  - is a 2-D matrix that represents the result (= A * B + Bias)
     51 *  - no. of rows = no. of rows of Input matrix A
     52 *  - no. of columns = no. of columns of Input matrix B (in
     53 * untransposed form)
     54 *  - is represented as array (contiguous memory locations) in row-major format
     55 *
     56 * Please note that most of the functions in this interface might have
     57 * architecture specific implementations.
     58 *
     59 * Conventions followed for the interface:
     60 *  - Unless explicitly mentioned, Input matrix B refers to an unquantized
     61 * (i.e. float values) and non-transposed version
     62 *  - no. of rows of Input matrix A = `rowsA`
     63 *  - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B
     64 * (`rowsB`) = `width`
     65 *  - no. of columns of Input matrix B = `colsB`
     66 */
     67 
     68 /* Prepare B for the Matrix Multiply function from Input matrix B.
     69 *
     70 * Quantization is performed on the input.
     71 * The final prepared B is in CPU-dependent format and can be used as an input
     72 * to matrix multiply function (`int8_multiply_and_add_bias`).
     73 *
     74 * Please note that this interface might have architecture specific
     75 * implementation.
     76 *
     77 * @param[in]   inputMatrixB        An array representing the Input matrix B in
     78 *                                  row-major format.
     79 *                                  Size of the array = `rowsB` * `colsB`.
     80 *                                  Shape of the matrix: (`rowsB`, `colsB`)
     81 * @param[in]   scale               The scaling factor (for quantization)
     82 * @param[in]   zeroPoint           The zero point (for quantization)
     83 * @param[in]   rowsB               No. of rows of Input matrix B. It should be
     84 *                                  a positive integer and a multiple of 64.
     85 * @param[in]   colsB               No. of columns of Input matrix B. It should
     86 *                                  be a positive integer and a multiple of 8.
     87 * @param[out]  outputMatrixB       An array representing the prepared B matrix.
     88 *                                  Size of the array = `rowsB` * `colsB`.
     89 *
     90 * This function implements the intrinsic:
     91 *   int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32,
     92 * colsB: i32, outputMatrixB: i32) which implements the function:
     93 *   int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint,
     94 * uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
     95 */
     96 int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB,
     97                       float scale, float zeroPoint, uint32_t rowsB,
     98                       uint32_t colsB, uint32_t outputMatrixB,
     99                       uint8_t* memBase);
    100 
    101 /* Prepare B for the Matrix Multiply function from transposed version of Input
    102 * matrix B.
    103 *
    104 * Quantization is performed on floating values of input.
    105 * The final prepared B is in CPU-dependent format and can be used as an input
    106 * to matrix multiply function (`int8_multiply_and_add_bias`).
    107 *
    108 * Please note that this interface might have architecture specific
    109 * implementation.
    110 *
    111 * @param[in]   inputMatrixBTransposed An array representing transposed version
    112 *                                     of Input matrix B.
    113 *                                     It is in column-major format.
    114 *                                     Size of the array = `rowsB` * `colsB`.
    115 *                                     Shape of the matrix: (`colsB`, `rowsB`)
    116 * @param[in]   scale                  The scaling factor (for quantization)
    117 * @param[in]   zeroPoint              The zero point (for quantization)
    118 * @param[in]   rowsB                  No. of rows of Input matrix B. It should
    119 *                                     be a positive integer and a multiple of
    120 *                                     64.
    121 * @param[in]   colsB                  No. of columns of Input matrix B. It
    122 *                                     should be a positive integer and a
    123 *                                     multiple of 8.
    124 * @param[out]  outputMatrixB          An array representing the prepared B
    125 *                                     matrix. Size of array = `rowsB`*`colsB`
    126 *
    127 * This function implements the intrinsic:
    128 *   int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32,
    129 * zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements
    130 * the function: int8_prepare_b_from_transposed(const float*
    131 * inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB,
    132 * uint32_t colsB, int8_t* outputMatrixB)
    133 */
    134 int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance,
    135                                     uint32_t inputMatrixBTransposed,
    136                                     float scale, float zeroPoint,
    137                                     uint32_t rowsB, uint32_t colsB,
    138                                     uint32_t outputMatrixB, uint8_t* memBase);
    139 
    140 /* Prepare B for the Matrix Multiply function from a quantized and transposed
    141 * version of Input matrix B which is also in a CPU-independent format.
    142 *
    143 * The final prepared B is in CPU-dependent format and can be used as an input
    144 * to matrix multiply function (`int8_multiply_and_add_bias`).
    145 *
    146 * This function is useful while using the quantized models that are stored in a
    147 * CPU-independent format on the disk.
    148 *
    149 * @param[in]   inputMatrixBQuantizedTransposed  An array representing the
    150 *                                               quantized and transposed
    151 *                                               version of Input matrix B.
    152 *                                               It is in column-major format.
    153 *                                               Size of array =
    154 *                                                 `rowsB`*`colsB`
    155 *                                               Shape of the matrix:
    156 *                                                 (`colsB`,`rowsB`)
    157 * @param[in]   rowsB                            No. of rows of Input matrix B.
    158 *                                               Should be a positive integer
    159 *                                               and a multiple of 64.
    160 * @param[in]   colsB                            No. of columns of Input matrix
    161 *                                               B. Should be a positive
    162 *                                               integer and a multiple of 8
    163 * @param[out]  outputMatrixB                    An array representing the
    164 *                                               prepared B matrix.
    165 *                                               Size: `rowsB` * `colsB`.
    166 *
    167 * This function implements the intrinsic:
    168 *   int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed:
    169 * i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the
    170 * function: int8_prepare_b_from_quantized_transposed(const int8_t*
    171 * inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t*
    172 * outputMatrixB)
    173 */
    174 int32_t IntrI8PrepareBFromQuantizedTransposed(
    175    wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
    176    uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase);
    177 
    178 /* Prepare A for the Matrix Multiply function from Input matrix A.
    179 *
    180 * It performs quantization on floating values of input.
    181 * The final prepared A might be architecture dependent. e.g. On some
    182 * architectures like x86, it might be unsigned (achieved by adding 127 to
    183 * quantized values) while on others like Arm, it might be signed. The final
    184 * prepared A can be used as an input to matrix multiply function
    185 * (`int8_multiply_and_add_bias`).
    186 *
    187 * Please note that this interface might have architecture specific
    188 * implementation.
    189 *
    190 * @param[in]   inputMatrixA   An array representing the Input matrix A in
    191 *                             row-major format.
    192 *                             Size of the array = `rowsA` * `colsA`.
    193 *                             Shape of the matrix: (`rowsA`, `colsA`)
    194 * @param[in]   scale          The scaling factor (for quantization)
    195 * @param[in]   zeroPoint      The zero point (for quantization)
    196 * @param[in]   rowsA          No. of rows of Input matrix A. It should be a
    197 *                             positive integer.
    198 * @param[in]   colsA          No. of columns of Input matrix A. It should be a
    199 *                             positive integer and a multiple of 64.
    200 * @param[out]  outputMatrixA  An array representing the prepared A matrix.
    201 *                             Size of the array = `rowsA` * `colsA`.
    202 *
    203 * This function implements the intrinsic:
    204 *   int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32,
    205 * colsA: i32, outputMatrixA: i32) which implements the function:
    206 *   int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint,
    207 * uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
    208 */
    209 int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA,
    210                       float scale, float zeroPoint, uint32_t rowsA,
    211                       uint32_t colsA, uint32_t outputMatrixA,
    212                       uint8_t* memBase);
    213 
    214 /* Prepares bias for the Matrix Multiply function.
    215 *
    216 * It uses the prepared B (which must be obtained by using any of the
    217 * int8_prepare_b* functions) and a bias input to prepare the final bias.
    218 *
    219 * The final bias can be used as an input to matrix multiply function
    220 * (`int8_multiply_and_add_bias`).
    221 *
    222 * @param[in]   inputMatrixBPrepared An array representing the prepared B
    223 *                                   matrix. Size of array = `rowsB`*`colsB`.
    224 * @param[in]   scaleA               The scaling factor (for quantization) of A
    225 * @param[in]   zeroPointA           The zero point (for quantization) of A
    226 * @param[in]   scaleB               The scaling factor (for quantization) of B
    227 * @param[in]   zeroPointB           The zero point (for quantization) of B
    228 * @param[in]   rowsB                No. of rows of Input matrix B (unquantized
    229 *                                   & non-transposed). It should be a positive
    230 *                                   integer and a multiple of 64.
    231 * @param[in]   colsB                No. of columns of Input matrix B
    232 *                                   (unquantized & non-transposed). It should
    233 *                                   be a positive integer and a multiple of 8.
    234 * @param[in]   inputBias            An array representing the input bias. Size
    235 *                                   of array = `colsB`
    236 * @param[out]  output               An array representing the final prepared
    237 *                                   bias. Size of the array = `colsB`
    238 *
    239 * This function implements the intrinsic:
    240 *   int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32,
    241 * scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output:
    242 * i32) which implements the function: int8_prepare_bias(const int8_t*
    243 * inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float
    244 * zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float*
    245 * output)
    246 */
    247 int32_t IntrI8PrepareBias(wasm::Instance* instance,
    248                          uint32_t inputMatrixBPrepared, float scaleA,
    249                          float zeroPointA, float scaleB, float zeroPointB,
    250                          uint32_t rowsB, uint32_t colsB, uint32_t inputBias,
    251                          uint32_t output, uint8_t* memBase);
    252 
    253 /* Perform multiplication of 2 matrices followed by adding a bias.
    254 *
    255 * i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
    256 *
    257 * The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared
    258 * of this function must be obtained by using `int8_prepare_A`, one of the
    259 * `int8_prepare_b*` and `int8_prepare_bias` functions respectively.
    260 *
    261 * Please note that this interface might have architecture specific
    262 * implementation.
    263 *
    264 * @param[in]   inputMatrixAPrepared   An array representing the prepared A
    265 *                                     matrix. This must be obtained by using
    266 *                                     `int8_prepare_A` function. Size of the
    267 *                                     array = `rowsA` * `width`.
    268 * @param[in]   scaleA                 The scaling factor (quantization) of A
    269 * @param[in]   zeroPointA             The zero point (for quantization) of A
    270 * @param[in]   inputMatrixBPrepared   An array representing the prepared B
    271 *                                     matrix. This must be obtained by using
    272 *                                     one of `int8_prepare_b*` functions.
    273 *                                     Size of the array = `width` * `colsB`.
    274 * @param[in]   scaleB                 The scaling factor (quantization) of B
    275 * @param[in]   zeroPointB             The zero point (for quantization) of B
    276 * @param[in]   inputBiasPrepared      An array representing the prepared bias.
    277 *                                     This must be obtained by using
    278 *                                     `int8_prepare_bias` function.
    279 *                                     Size of the array = `colsB`
    280 * @param[in]   unquantMultiplier      A value that will be multiplied to the
    281 *                                     final unquantization factor that is
    282 *                                     prepared from `scaleA` and `scaleB`.
    283 * @param[in]   rowsA                  No. of rows of Input matrix A. It should
    284 *                                     be a positive integer.
    285 * @param[in]   width                  No. of columns of Input matrix A (same as
    286 *                                     no. of columns of Input matrix B). It
    287 *                                     should be a positive integer and a
    288 *                                     multiple of 64.
    289 * @param[in]   colsB                  No. of columns of Input matrix B. Should
    290 *                                     be a multiple of 8.
    291 * @param[out]  output                 An array representing the result matrix
    292 *                                     in row-major format.
    293 *                                     Size of the array = `rowsA` * `colsB`.
    294 *
    295 * This function implements the intrinsic:
    296 *   int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32,
    297 * zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
    298 *                     inputBiasPrepared: i32, unquantMultiplier: f32,
    299 *                     rowsA: i32, width: i32, colsB: i32, output: i32)
    300 * which implements the function:
    301 *   int8_multiply_and_add_bias(const uint8_t* inputMatrixAPrepared, float
    302 * scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB,
    303 * float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier,
    304 *                     uint32_t rowsA, uint32_t width, uint32_t colsB, float*
    305 * output)
    306 */
    307 int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance,
    308                                 uint32_t inputMatrixAPrepared, float scaleA,
    309                                 float zeroPointA,
    310                                 uint32_t inputMatrixBPrepared, float scaleB,
    311                                 float zeroPointB, uint32_t inputBiasPrepared,
    312                                 float unquantMultiplier, uint32_t rowsA,
    313                                 uint32_t width, uint32_t colsB,
    314                                 uint32_t output, uint8_t* memBase);
    315 
    316 /* Select a subset of columns of prepared B.
    317 *
    318 * Indices of the columns to be selected are specified by an array.
    319 *
    320 * @param[in]   inputMatrixBPrepared  An array representing the prepared B
    321 *                                    matrix. This must be obtained by using
    322 *                                    one of the `int8_prepare_b*` functions.
    323 *                                    Size of the array = `rowsB` * `colsB`.
    324 * @param[in]   rowsB                 No. of rows of Input matrix B. It should
    325 *                                    be a positive integer and a multiple
    326 *                                    of 64.
    327 * @param[in]   colsB                 No. of columns of Input matrix B. It
    328 *                                    should be a positive integer and a
    329 *                                    multiple of 8.
    330 * @param[in]   colIndexList          An array of column indices to be selected
    331 *                                    from prepared B. All indices of the array
    332 *                                    should be valid
    333 *                                    i.e. 0 <= colIndexList[N] < colsB
    334 *                                    where N = 0, 1 ....(`sizeColIndexList`-1)
    335 * @param[in]   sizeColIndexList      Size of the `colIndexList` array. It
    336 *                                    should be a positive integer and a
    337 *                                    multiple of 8.
    338 * @param[out]  output                An array representing the selected columns
    339 *                                    of prepared B.
    340 *                                    Size = `rowsB` * `sizeColIndexList`.
    341 *
    342 * This function implements the intrinsic:
    343 *   int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32,
    344 * colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the
    345 * function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared,
    346 * uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t
    347 * sizeColIndexList, int8_t* output)
    348 */
    349 int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance,
    350                               uint32_t inputMatrixBPrepared, uint32_t rowsB,
    351                               uint32_t colsB, uint32_t colIndexList,
    352                               uint32_t sizeColIndexList, uint32_t output,
    353                               uint8_t* memBase);
    354 
    355 }  // namespace intgemm
    356 }  // namespace js
    357 
    358 #endif  // intgemm_IntegerGemmIntrinsic_h
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE