tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ipred.h (15837B)


      1 /*
      2 * Copyright © 2018, VideoLAN and dav1d authors
      3 * All rights reserved.
      4 *
      5 * Redistribution and use in source and binary forms, with or without
      6 * modification, are permitted provided that the following conditions are met:
      7 *
      8 * 1. Redistributions of source code must retain the above copyright notice, this
      9 *    list of conditions and the following disclaimer.
     10 *
     11 * 2. Redistributions in binary form must reproduce the above copyright notice,
     12 *    this list of conditions and the following disclaimer in the documentation
     13 *    and/or other materials provided with the distribution.
     14 *
     15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25 */
     26 
     27 #include "src/cpu.h"
     28 #include "src/ipred.h"
     29 
     30 decl_angular_ipred_fn(BF(dav1d_ipred_dc, neon));
     31 decl_angular_ipred_fn(BF(dav1d_ipred_dc_128, neon));
     32 decl_angular_ipred_fn(BF(dav1d_ipred_dc_top, neon));
     33 decl_angular_ipred_fn(BF(dav1d_ipred_dc_left, neon));
     34 decl_angular_ipred_fn(BF(dav1d_ipred_h, neon));
     35 decl_angular_ipred_fn(BF(dav1d_ipred_v, neon));
     36 decl_angular_ipred_fn(BF(dav1d_ipred_paeth, neon));
     37 decl_angular_ipred_fn(BF(dav1d_ipred_smooth, neon));
     38 decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, neon));
     39 decl_angular_ipred_fn(BF(dav1d_ipred_smooth_h, neon));
     40 decl_angular_ipred_fn(BF(dav1d_ipred_filter, neon));
     41 
     42 decl_cfl_pred_fn(BF(dav1d_ipred_cfl, neon));
     43 decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, neon));
     44 decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, neon));
     45 decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, neon));
     46 
     47 decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_420, neon));
     48 decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_422, neon));
     49 decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_444, neon));
     50 
     51 decl_pal_pred_fn(BF(dav1d_pal_pred, neon));
     52 
     53 #if ARCH_AARCH64
     54 void BF(dav1d_ipred_z1_upsample_edge, neon)(pixel *out, const int hsz,
     55                                            const pixel *const in,
     56                                            const int end HIGHBD_DECL_SUFFIX);
     57 void BF(dav1d_ipred_z1_filter_edge, neon)(pixel *out, const int sz,
     58                                          const pixel *const in,
     59                                          const int end, const int strength);
     60 void BF(dav1d_ipred_pixel_set, neon)(pixel *out, const pixel px,
     61                                     const int n);
     62 void BF(dav1d_ipred_z1_fill1, neon)(pixel *dst, ptrdiff_t stride,
     63                                    const pixel *const top, const int width,
     64                                    const int height, const int dx,
     65                                    const int max_base_x);
     66 void BF(dav1d_ipred_z1_fill2, neon)(pixel *dst, ptrdiff_t stride,
     67                                    const pixel *const top, const int width,
     68                                    const int height, const int dx,
     69                                    const int max_base_x);
     70 
     71 static void ipred_z1_neon(pixel *dst, const ptrdiff_t stride,
     72                          const pixel *const topleft_in,
     73                          const int width, const int height, int angle,
     74                          const int max_width, const int max_height
     75                          HIGHBD_DECL_SUFFIX)
     76 {
     77    const int is_sm = (angle >> 9) & 0x1;
     78    const int enable_intra_edge_filter = angle >> 10;
     79    angle &= 511;
     80    int dx = dav1d_dr_intra_derivative[angle >> 1];
     81    pixel top_out[64 + 64 + (64+15)*2 + 16];
     82    int max_base_x;
     83    const int upsample_above = enable_intra_edge_filter ?
     84        get_upsample(width + height, 90 - angle, is_sm) : 0;
     85    if (upsample_above) {
     86        BF(dav1d_ipred_z1_upsample_edge, neon)(top_out, width + height,
     87                                               topleft_in,
     88                                               width + imin(width, height)
     89                                               HIGHBD_TAIL_SUFFIX);
     90        max_base_x = 2 * (width + height) - 2;
     91        dx <<= 1;
     92    } else {
     93        const int filter_strength = enable_intra_edge_filter ?
     94            get_filter_strength(width + height, 90 - angle, is_sm) : 0;
     95        if (filter_strength) {
     96            BF(dav1d_ipred_z1_filter_edge, neon)(top_out, width + height,
     97                                                 topleft_in,
     98                                                 width + imin(width, height),
     99                                                 filter_strength);
    100            max_base_x = width + height - 1;
    101        } else {
    102            max_base_x = width + imin(width, height) - 1;
    103            memcpy(top_out, &topleft_in[1], (max_base_x + 1) * sizeof(pixel));
    104        }
    105    }
    106    const int base_inc = 1 + upsample_above;
    107    int pad_pixels = width + 15; // max(dx >> 6) == 15
    108    BF(dav1d_ipred_pixel_set, neon)(&top_out[max_base_x + 1],
    109                                    top_out[max_base_x], pad_pixels * base_inc);
    110    if (upsample_above)
    111        BF(dav1d_ipred_z1_fill2, neon)(dst, stride, top_out, width, height,
    112                                       dx, max_base_x);
    113    else
    114        BF(dav1d_ipred_z1_fill1, neon)(dst, stride, top_out, width, height,
    115                                       dx, max_base_x);
    116 }
    117 
    118 void BF(dav1d_ipred_reverse, neon)(pixel *dst, const pixel *const src,
    119                                   const int n);
    120 
    121 void BF(dav1d_ipred_z2_upsample_edge, neon)(pixel *out, const int sz,
    122                                            const pixel *const in
    123                                            HIGHBD_DECL_SUFFIX);
    124 
    125 void BF(dav1d_ipred_z2_fill1, neon)(pixel *dst, ptrdiff_t stride,
    126                                    const pixel *const top,
    127                                    const pixel *const left,
    128                                    const int width, const int height,
    129                                    const int dx, const int dy);
    130 void BF(dav1d_ipred_z2_fill2, neon)(pixel *dst, ptrdiff_t stride,
    131                                    const pixel *const top,
    132                                    const pixel *const left,
    133                                    const int width, const int height,
    134                                    const int dx, const int dy);
    135 void BF(dav1d_ipred_z2_fill3, neon)(pixel *dst, ptrdiff_t stride,
    136                                    const pixel *const top,
    137                                    const pixel *const left,
    138                                    const int width, const int height,
    139                                    const int dx, const int dy);
    140 
    141 static void ipred_z2_neon(pixel *dst, const ptrdiff_t stride,
    142                          const pixel *const topleft_in,
    143                          const int width, const int height, int angle,
    144                          const int max_width, const int max_height
    145                          HIGHBD_DECL_SUFFIX)
    146 {
    147    const int is_sm = (angle >> 9) & 0x1;
    148    const int enable_intra_edge_filter = angle >> 10;
    149    angle &= 511;
    150    assert(angle > 90 && angle < 180);
    151    int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1];
    152    int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1];
    153    const int upsample_left = enable_intra_edge_filter ?
    154        get_upsample(width + height, 180 - angle, is_sm) : 0;
    155    const int upsample_above = enable_intra_edge_filter ?
    156        get_upsample(width + height, angle - 90, is_sm) : 0;
    157    pixel buf[3*(64+1)];
    158    pixel *left = &buf[2*(64+1)];
    159    // The asm can underread below the start of top[] and left[]; to avoid
    160    // surprising behaviour, make sure this is within the allocated stack space.
    161    pixel *top = &buf[1*(64+1)];
    162    pixel *flipped = &buf[0*(64+1)];
    163 
    164    if (upsample_above) {
    165        BF(dav1d_ipred_z2_upsample_edge, neon)(top, width, topleft_in
    166                                               HIGHBD_TAIL_SUFFIX);
    167        dx <<= 1;
    168    } else {
    169        const int filter_strength = enable_intra_edge_filter ?
    170            get_filter_strength(width + height, angle - 90, is_sm) : 0;
    171 
    172        if (filter_strength) {
    173            BF(dav1d_ipred_z1_filter_edge, neon)(&top[1], imin(max_width, width),
    174                                                 topleft_in, width,
    175                                                 filter_strength);
    176            if (max_width < width)
    177                memcpy(&top[1 + max_width], &topleft_in[1 + max_width],
    178                       (width - max_width) * sizeof(pixel));
    179        } else {
    180            pixel_copy(&top[1], &topleft_in[1], width);
    181        }
    182    }
    183    if (upsample_left) {
    184        flipped[0] = topleft_in[0];
    185        BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
    186                                      height);
    187        BF(dav1d_ipred_z2_upsample_edge, neon)(left, height, flipped
    188                                               HIGHBD_TAIL_SUFFIX);
    189        dy <<= 1;
    190    } else {
    191        const int filter_strength = enable_intra_edge_filter ?
    192            get_filter_strength(width + height, 180 - angle, is_sm) : 0;
    193 
    194        if (filter_strength) {
    195            flipped[0] = topleft_in[0];
    196            BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
    197                                          height);
    198            BF(dav1d_ipred_z1_filter_edge, neon)(&left[1], imin(max_height, height),
    199                                                 flipped, height,
    200                                                 filter_strength);
    201            if (max_height < height)
    202                memcpy(&left[1 + max_height], &flipped[1 + max_height],
    203                       (height - max_height) * sizeof(pixel));
    204        } else {
    205            BF(dav1d_ipred_reverse, neon)(&left[1], &topleft_in[0],
    206                                          height);
    207        }
    208    }
    209    top[0] = left[0] = *topleft_in;
    210 
    211    assert(!(upsample_above && upsample_left));
    212    if (!upsample_above && !upsample_left) {
    213        BF(dav1d_ipred_z2_fill1, neon)(dst, stride, top, left, width, height,
    214                                       dx, dy);
    215    } else if (upsample_above) {
    216        BF(dav1d_ipred_z2_fill2, neon)(dst, stride, top, left, width, height,
    217                                       dx, dy);
    218    } else /*if (upsample_left)*/ {
    219        BF(dav1d_ipred_z2_fill3, neon)(dst, stride, top, left, width, height,
    220                                       dx, dy);
    221    }
    222 }
    223 
    224 void BF(dav1d_ipred_z3_fill1, neon)(pixel *dst, ptrdiff_t stride,
    225                                    const pixel *const left, const int width,
    226                                    const int height, const int dy,
    227                                    const int max_base_y);
    228 void BF(dav1d_ipred_z3_fill2, neon)(pixel *dst, ptrdiff_t stride,
    229                                    const pixel *const left, const int width,
    230                                    const int height, const int dy,
    231                                    const int max_base_y);
    232 
    233 static void ipred_z3_neon(pixel *dst, const ptrdiff_t stride,
    234                          const pixel *const topleft_in,
    235                          const int width, const int height, int angle,
    236                          const int max_width, const int max_height
    237                          HIGHBD_DECL_SUFFIX)
    238 {
    239    const int is_sm = (angle >> 9) & 0x1;
    240    const int enable_intra_edge_filter = angle >> 10;
    241    angle &= 511;
    242    assert(angle > 180);
    243    int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1];
    244    pixel flipped[64 + 64 + 16];
    245    pixel left_out[64 + 64 + (64+15)*2];
    246    int max_base_y;
    247    const int upsample_left = enable_intra_edge_filter ?
    248        get_upsample(width + height, angle - 180, is_sm) : 0;
    249    if (upsample_left) {
    250        flipped[0] = topleft_in[0];
    251        BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
    252                                      height + imax(width, height));
    253        BF(dav1d_ipred_z1_upsample_edge, neon)(left_out, width + height,
    254                                               flipped,
    255                                               height + imin(width, height)
    256                                               HIGHBD_TAIL_SUFFIX);
    257        max_base_y = 2 * (width + height) - 2;
    258        dy <<= 1;
    259    } else {
    260        const int filter_strength = enable_intra_edge_filter ?
    261            get_filter_strength(width + height, angle - 180, is_sm) : 0;
    262 
    263        if (filter_strength) {
    264            flipped[0] = topleft_in[0];
    265            BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
    266                                          height + imax(width, height));
    267            BF(dav1d_ipred_z1_filter_edge, neon)(left_out, width + height,
    268                                                 flipped,
    269                                                 height + imin(width, height),
    270                                                 filter_strength);
    271            max_base_y = width + height - 1;
    272        } else {
    273            BF(dav1d_ipred_reverse, neon)(left_out, &topleft_in[0],
    274                                          height + imin(width, height));
    275            max_base_y = height + imin(width, height) - 1;
    276        }
    277    }
    278    const int base_inc = 1 + upsample_left;
    279    // The tbx based implementation needs left[] to have 64 bytes intitialized,
    280    // the other implementation can read height + max(dy >> 6) past the end.
    281    int pad_pixels = imax(64 - max_base_y - 1, height + 15);
    282 
    283    BF(dav1d_ipred_pixel_set, neon)(&left_out[max_base_y + 1],
    284                                    left_out[max_base_y], pad_pixels * base_inc);
    285    if (upsample_left)
    286        BF(dav1d_ipred_z3_fill2, neon)(dst, stride, left_out, width, height,
    287                                       dy, max_base_y);
    288    else
    289        BF(dav1d_ipred_z3_fill1, neon)(dst, stride, left_out, width, height,
    290                                       dy, max_base_y);
    291 }
    292 #endif
    293 
    294 static ALWAYS_INLINE void intra_pred_dsp_init_arm(Dav1dIntraPredDSPContext *const c) {
    295    const unsigned flags = dav1d_get_cpu_flags();
    296 
    297    if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
    298 
    299    c->intra_pred[DC_PRED]       = BF(dav1d_ipred_dc, neon);
    300    c->intra_pred[DC_128_PRED]   = BF(dav1d_ipred_dc_128, neon);
    301    c->intra_pred[TOP_DC_PRED]   = BF(dav1d_ipred_dc_top, neon);
    302    c->intra_pred[LEFT_DC_PRED]  = BF(dav1d_ipred_dc_left, neon);
    303    c->intra_pred[HOR_PRED]      = BF(dav1d_ipred_h, neon);
    304    c->intra_pred[VERT_PRED]     = BF(dav1d_ipred_v, neon);
    305    c->intra_pred[PAETH_PRED]    = BF(dav1d_ipred_paeth, neon);
    306    c->intra_pred[SMOOTH_PRED]   = BF(dav1d_ipred_smooth, neon);
    307    c->intra_pred[SMOOTH_V_PRED] = BF(dav1d_ipred_smooth_v, neon);
    308    c->intra_pred[SMOOTH_H_PRED] = BF(dav1d_ipred_smooth_h, neon);
    309 #if ARCH_AARCH64
    310    c->intra_pred[Z1_PRED]       = ipred_z1_neon;
    311    c->intra_pred[Z2_PRED]       = ipred_z2_neon;
    312    c->intra_pred[Z3_PRED]       = ipred_z3_neon;
    313 #endif
    314    c->intra_pred[FILTER_PRED]   = BF(dav1d_ipred_filter, neon);
    315 
    316    c->cfl_pred[DC_PRED]         = BF(dav1d_ipred_cfl, neon);
    317    c->cfl_pred[DC_128_PRED]     = BF(dav1d_ipred_cfl_128, neon);
    318    c->cfl_pred[TOP_DC_PRED]     = BF(dav1d_ipred_cfl_top, neon);
    319    c->cfl_pred[LEFT_DC_PRED]    = BF(dav1d_ipred_cfl_left, neon);
    320 
    321    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_ipred_cfl_ac_420, neon);
    322    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_ipred_cfl_ac_422, neon);
    323    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_ipred_cfl_ac_444, neon);
    324 
    325    c->pal_pred                  = BF(dav1d_pal_pred, neon);
    326 }