tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mem_rvv.h (10357B)


      1 /*
      2 * Copyright (c) 2025, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AOM_DSP_RISCV_MEM_RVV_H_
     13 #define AOM_AOM_DSP_RISCV_MEM_RVV_H_
     14 
     15 #include <riscv_vector.h>
     16 
     17 static inline void load_s16_4x5(const int16_t *s, int p, vint16mf2_t *const s0,
     18                                vint16mf2_t *const s1, vint16mf2_t *const s2,
     19                                vint16mf2_t *const s3, vint16mf2_t *const s4,
     20                                size_t vl) {
     21  *s0 = __riscv_vle16_v_i16mf2(s, vl);
     22  s += p;
     23  *s1 = __riscv_vle16_v_i16mf2(s, vl);
     24  s += p;
     25  *s2 = __riscv_vle16_v_i16mf2(s, vl);
     26  s += p;
     27  *s3 = __riscv_vle16_v_i16mf2(s, vl);
     28  s += p;
     29  *s4 = __riscv_vle16_v_i16mf2(s, vl);
     30 }
     31 
     32 static inline void load_s16_4x4(const int16_t *s, int p, vint16mf2_t *const s0,
     33                                vint16mf2_t *const s1, vint16mf2_t *const s2,
     34                                vint16mf2_t *const s3, size_t vl) {
     35  *s0 = __riscv_vle16_v_i16mf2(s, vl);
     36  s += p;
     37  *s1 = __riscv_vle16_v_i16mf2(s, vl);
     38  s += p;
     39  *s2 = __riscv_vle16_v_i16mf2(s, vl);
     40  s += p;
     41  *s3 = __riscv_vle16_v_i16mf2(s, vl);
     42 }
     43 
     44 static inline void load_s16_8x5(const int16_t *s, int p, vint16m1_t *const s0,
     45                                vint16m1_t *const s1, vint16m1_t *const s2,
     46                                vint16m1_t *const s3, vint16m1_t *const s4,
     47                                size_t vl) {
     48  *s0 = __riscv_vle16_v_i16m1(s, vl);
     49  s += p;
     50  *s1 = __riscv_vle16_v_i16m1(s, vl);
     51  s += p;
     52  *s2 = __riscv_vle16_v_i16m1(s, vl);
     53  s += p;
     54  *s3 = __riscv_vle16_v_i16m1(s, vl);
     55  s += p;
     56  *s4 = __riscv_vle16_v_i16m1(s, vl);
     57 }
     58 
     59 static inline void load_s16_8x4(const int16_t *s, int p, vint16m1_t *const s0,
     60                                vint16m1_t *const s1, vint16m1_t *const s2,
     61                                vint16m1_t *const s3, size_t vl) {
     62  *s0 = __riscv_vle16_v_i16m1(s, vl);
     63  s += p;
     64  *s1 = __riscv_vle16_v_i16m1(s, vl);
     65  s += p;
     66  *s2 = __riscv_vle16_v_i16m1(s, vl);
     67  s += p;
     68  *s3 = __riscv_vle16_v_i16m1(s, vl);
     69 }
     70 
     71 static inline void store_u16_8x4(uint16_t *s, int p, const vuint16m1_t s0,
     72                                 const vuint16m1_t s1, const vuint16m1_t s2,
     73                                 const vuint16m1_t s3, size_t vl) {
     74  __riscv_vse16_v_u16m1(s, s0, vl);
     75  s += p;
     76  __riscv_vse16_v_u16m1(s, s1, vl);
     77  s += p;
     78  __riscv_vse16_v_u16m1(s, s2, vl);
     79  s += p;
     80  __riscv_vse16_v_u16m1(s, s3, vl);
     81 }
     82 
     83 static inline void load_s16_4x7(const int16_t *s, int p, vint16mf2_t *const s0,
     84                                vint16mf2_t *const s1, vint16mf2_t *const s2,
     85                                vint16mf2_t *const s3, vint16mf2_t *const s4,
     86                                vint16mf2_t *const s5, vint16mf2_t *const s6,
     87                                size_t vl) {
     88  *s0 = __riscv_vle16_v_i16mf2(s, vl);
     89  s += p;
     90  *s1 = __riscv_vle16_v_i16mf2(s, vl);
     91  s += p;
     92  *s2 = __riscv_vle16_v_i16mf2(s, vl);
     93  s += p;
     94  *s3 = __riscv_vle16_v_i16mf2(s, vl);
     95  s += p;
     96  *s4 = __riscv_vle16_v_i16mf2(s, vl);
     97  s += p;
     98  *s5 = __riscv_vle16_v_i16mf2(s, vl);
     99  s += p;
    100  *s6 = __riscv_vle16_v_i16mf2(s, vl);
    101 }
    102 
    103 static inline void load_s16_8x7(const int16_t *s, int p, vint16m1_t *const s0,
    104                                vint16m1_t *const s1, vint16m1_t *const s2,
    105                                vint16m1_t *const s3, vint16m1_t *const s4,
    106                                vint16m1_t *const s5, vint16m1_t *const s6,
    107                                size_t vl) {
    108  *s0 = __riscv_vle16_v_i16m1(s, vl);
    109  s += p;
    110  *s1 = __riscv_vle16_v_i16m1(s, vl);
    111  s += p;
    112  *s2 = __riscv_vle16_v_i16m1(s, vl);
    113  s += p;
    114  *s3 = __riscv_vle16_v_i16m1(s, vl);
    115  s += p;
    116  *s4 = __riscv_vle16_v_i16m1(s, vl);
    117  s += p;
    118  *s5 = __riscv_vle16_v_i16m1(s, vl);
    119  s += p;
    120  *s6 = __riscv_vle16_v_i16m1(s, vl);
    121 }
    122 
    123 static inline void load_s16_4x11(const int16_t *s, int p, vint16mf2_t *const s0,
    124                                 vint16mf2_t *const s1, vint16mf2_t *const s2,
    125                                 vint16mf2_t *const s3, vint16mf2_t *const s4,
    126                                 vint16mf2_t *const s5, vint16mf2_t *const s6,
    127                                 vint16mf2_t *const s7, vint16mf2_t *const s8,
    128                                 vint16mf2_t *const s9, vint16mf2_t *const s10,
    129                                 size_t vl) {
    130  *s0 = __riscv_vle16_v_i16mf2(s, vl);
    131  s += p;
    132  *s1 = __riscv_vle16_v_i16mf2(s, vl);
    133  s += p;
    134  *s2 = __riscv_vle16_v_i16mf2(s, vl);
    135  s += p;
    136  *s3 = __riscv_vle16_v_i16mf2(s, vl);
    137  s += p;
    138  *s4 = __riscv_vle16_v_i16mf2(s, vl);
    139  s += p;
    140  *s5 = __riscv_vle16_v_i16mf2(s, vl);
    141  s += p;
    142  *s6 = __riscv_vle16_v_i16mf2(s, vl);
    143  s += p;
    144  *s7 = __riscv_vle16_v_i16mf2(s, vl);
    145  s += p;
    146  *s8 = __riscv_vle16_v_i16mf2(s, vl);
    147  s += p;
    148  *s9 = __riscv_vle16_v_i16mf2(s, vl);
    149  s += p;
    150  *s10 = __riscv_vle16_v_i16mf2(s, vl);
    151 }
    152 
    153 static inline void load_s16_8x11(const int16_t *s, int p, vint16m1_t *const s0,
    154                                 vint16m1_t *const s1, vint16m1_t *const s2,
    155                                 vint16m1_t *const s3, vint16m1_t *const s4,
    156                                 vint16m1_t *const s5, vint16m1_t *const s6,
    157                                 vint16m1_t *const s7, vint16m1_t *const s8,
    158                                 vint16m1_t *const s9, vint16m1_t *const s10,
    159                                 size_t vl) {
    160  *s0 = __riscv_vle16_v_i16m1(s, vl);
    161  s += p;
    162  *s1 = __riscv_vle16_v_i16m1(s, vl);
    163  s += p;
    164  *s2 = __riscv_vle16_v_i16m1(s, vl);
    165  s += p;
    166  *s3 = __riscv_vle16_v_i16m1(s, vl);
    167  s += p;
    168  *s4 = __riscv_vle16_v_i16m1(s, vl);
    169  s += p;
    170  *s5 = __riscv_vle16_v_i16m1(s, vl);
    171  s += p;
    172  *s6 = __riscv_vle16_v_i16m1(s, vl);
    173  s += p;
    174  *s7 = __riscv_vle16_v_i16m1(s, vl);
    175  s += p;
    176  *s8 = __riscv_vle16_v_i16m1(s, vl);
    177  s += p;
    178  *s9 = __riscv_vle16_v_i16m1(s, vl);
    179  s += p;
    180  *s10 = __riscv_vle16_v_i16m1(s, vl);
    181 }
    182 
    183 static inline void load_s16_8x6(const int16_t *s, int p, vint16m1_t *const s0,
    184                                vint16m1_t *const s1, vint16m1_t *const s2,
    185                                vint16m1_t *const s3, vint16m1_t *const s4,
    186                                vint16m1_t *const s5, size_t vl) {
    187  *s0 = __riscv_vle16_v_i16m1(s, vl);
    188  s += p;
    189  *s1 = __riscv_vle16_v_i16m1(s, vl);
    190  s += p;
    191  *s2 = __riscv_vle16_v_i16m1(s, vl);
    192  s += p;
    193  *s3 = __riscv_vle16_v_i16m1(s, vl);
    194  s += p;
    195  *s4 = __riscv_vle16_v_i16m1(s, vl);
    196  s += p;
    197  *s5 = __riscv_vle16_v_i16m1(s, vl);
    198 }
    199 
    200 static inline void load_s16_8x8(const int16_t *s, int p, vint16m1_t *const s0,
    201                                vint16m1_t *const s1, vint16m1_t *const s2,
    202                                vint16m1_t *const s3, vint16m1_t *const s4,
    203                                vint16m1_t *const s5, vint16m1_t *const s6,
    204                                vint16m1_t *const s7, size_t vl) {
    205  *s0 = __riscv_vle16_v_i16m1(s, vl);
    206  s += p;
    207  *s1 = __riscv_vle16_v_i16m1(s, vl);
    208  s += p;
    209  *s2 = __riscv_vle16_v_i16m1(s, vl);
    210  s += p;
    211  *s3 = __riscv_vle16_v_i16m1(s, vl);
    212  s += p;
    213  *s4 = __riscv_vle16_v_i16m1(s, vl);
    214  s += p;
    215  *s5 = __riscv_vle16_v_i16m1(s, vl);
    216  s += p;
    217  *s6 = __riscv_vle16_v_i16m1(s, vl);
    218  s += p;
    219  *s7 = __riscv_vle16_v_i16m1(s, vl);
    220 }
    221 
    222 static inline void load_s16_8x12(const int16_t *s, int p, vint16m1_t *const s0,
    223                                 vint16m1_t *const s1, vint16m1_t *const s2,
    224                                 vint16m1_t *const s3, vint16m1_t *const s4,
    225                                 vint16m1_t *const s5, vint16m1_t *const s6,
    226                                 vint16m1_t *const s7, vint16m1_t *const s8,
    227                                 vint16m1_t *const s9, vint16m1_t *const s10,
    228                                 vint16m1_t *const s11, size_t vl) {
    229  *s0 = __riscv_vle16_v_i16m1(s, vl);
    230  s += p;
    231  *s1 = __riscv_vle16_v_i16m1(s, vl);
    232  s += p;
    233  *s2 = __riscv_vle16_v_i16m1(s, vl);
    234  s += p;
    235  *s3 = __riscv_vle16_v_i16m1(s, vl);
    236  s += p;
    237  *s4 = __riscv_vle16_v_i16m1(s, vl);
    238  s += p;
    239  *s5 = __riscv_vle16_v_i16m1(s, vl);
    240  s += p;
    241  *s6 = __riscv_vle16_v_i16m1(s, vl);
    242  s += p;
    243  *s7 = __riscv_vle16_v_i16m1(s, vl);
    244  s += p;
    245  *s8 = __riscv_vle16_v_i16m1(s, vl);
    246  s += p;
    247  *s9 = __riscv_vle16_v_i16m1(s, vl);
    248  s += p;
    249  *s10 = __riscv_vle16_v_i16m1(s, vl);
    250  s += p;
    251  *s11 = __riscv_vle16_v_i16m1(s, vl);
    252 }
    253 
    254 static inline void load_s16_4x12(const int16_t *s, int p, vint16mf2_t *const s0,
    255                                 vint16mf2_t *const s1, vint16mf2_t *const s2,
    256                                 vint16mf2_t *const s3, vint16mf2_t *const s4,
    257                                 vint16mf2_t *const s5, vint16mf2_t *const s6,
    258                                 vint16mf2_t *const s7, vint16mf2_t *const s8,
    259                                 vint16mf2_t *const s9, vint16mf2_t *const s10,
    260                                 vint16mf2_t *const s11, size_t vl) {
    261  *s0 = __riscv_vle16_v_i16mf2(s, vl);
    262  s += p;
    263  *s1 = __riscv_vle16_v_i16mf2(s, vl);
    264  s += p;
    265  *s2 = __riscv_vle16_v_i16mf2(s, vl);
    266  s += p;
    267  *s3 = __riscv_vle16_v_i16mf2(s, vl);
    268  s += p;
    269  *s4 = __riscv_vle16_v_i16mf2(s, vl);
    270  s += p;
    271  *s5 = __riscv_vle16_v_i16mf2(s, vl);
    272  s += p;
    273  *s6 = __riscv_vle16_v_i16mf2(s, vl);
    274  s += p;
    275  *s7 = __riscv_vle16_v_i16mf2(s, vl);
    276  s += p;
    277  *s8 = __riscv_vle16_v_i16mf2(s, vl);
    278  s += p;
    279  *s9 = __riscv_vle16_v_i16mf2(s, vl);
    280  s += p;
    281  *s10 = __riscv_vle16_v_i16mf2(s, vl);
    282  s += p;
    283  *s11 = __riscv_vle16_v_i16mf2(s, vl);
    284 }
    285 
    286 static inline void store_u16_4x4(uint16_t *s, int p, const vuint16mf2_t s0,
    287                                 const vuint16mf2_t s1, const vuint16mf2_t s2,
    288                                 const vuint16mf2_t s3, size_t vl) {
    289  __riscv_vse16_v_u16mf2(s, s0, vl);
    290  s += p;
    291  __riscv_vse16_v_u16mf2(s, s1, vl);
    292  s += p;
    293  __riscv_vse16_v_u16mf2(s, s2, vl);
    294  s += p;
    295  __riscv_vse16_v_u16mf2(s, s3, vl);
    296 }
    297 
    298 #endif  // AOM_AOM_DSP_RISCV_MEM_RVV_H_