tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

encodetxb_sse2.c (20545B)


      1 /*
      2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <emmintrin.h>  // SSE2
     14 
     15 #include "aom/aom_integer.h"
     16 #include "aom_dsp/x86/mem_sse2.h"
     17 #include "av1/common/av1_common_int.h"
     18 #include "av1/common/txb_common.h"
     19 
     20 static inline void load_levels_4x4x5_sse2(const uint8_t *const src,
     21                                          const int stride,
     22                                          const ptrdiff_t *const offsets,
     23                                          __m128i *const level) {
     24  level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride);
     25  level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride);
     26  level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride);
     27  level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride);
     28  level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride);
     29 }
     30 
     31 static inline void load_levels_8x2x5_sse2(const uint8_t *const src,
     32                                          const int stride,
     33                                          const ptrdiff_t *const offsets,
     34                                          __m128i *const level) {
     35  level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride);
     36  level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride);
     37  level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride);
     38  level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride);
     39  level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride);
     40 }
     41 
     42 static inline void load_levels_16x1x5_sse2(const uint8_t *const src,
     43                                           const int stride,
     44                                           const ptrdiff_t *const offsets,
     45                                           __m128i *const level) {
     46  level[0] = _mm_loadu_si128((__m128i *)(src + 1));
     47  level[1] = _mm_loadu_si128((__m128i *)(src + stride));
     48  level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0]));
     49  level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1]));
     50  level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2]));
     51 }
     52 
     53 static inline __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) {
     54  const __m128i const_3 = _mm_set1_epi8(3);
     55  const __m128i const_4 = _mm_set1_epi8(4);
     56  __m128i count;
     57 
     58  count = _mm_min_epu8(level[0], const_3);
     59  level[1] = _mm_min_epu8(level[1], const_3);
     60  level[2] = _mm_min_epu8(level[2], const_3);
     61  level[3] = _mm_min_epu8(level[3], const_3);
     62  level[4] = _mm_min_epu8(level[4], const_3);
     63  count = _mm_add_epi8(count, level[1]);
     64  count = _mm_add_epi8(count, level[2]);
     65  count = _mm_add_epi8(count, level[3]);
     66  count = _mm_add_epi8(count, level[4]);
     67  count = _mm_avg_epu8(count, _mm_setzero_si128());
     68  count = _mm_min_epu8(count, const_4);
     69  return count;
     70 }
     71 
     72 static inline void get_4_nz_map_contexts_2d(const uint8_t *levels,
     73                                            const int width,
     74                                            const ptrdiff_t *const offsets,
     75                                            int8_t *const coeff_contexts) {
     76  const int stride = 4 + TX_PAD_HOR;
     77  const __m128i pos_to_offset_large = _mm_set1_epi8(21);
     78  __m128i pos_to_offset =
     79      (width == 4)
     80          ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21)
     81          : _mm_setr_epi8(0, 16, 16, 16, 16, 16, 16, 16, 6, 6, 21, 21, 6, 21,
     82                          21, 21);
     83  __m128i count;
     84  __m128i level[5];
     85  int8_t *cc = coeff_contexts;
     86  int col = width;
     87 
     88  assert(!(width % 4));
     89 
     90  do {
     91    load_levels_4x4x5_sse2(levels, stride, offsets, level);
     92    count = get_coeff_contexts_kernel_sse2(level);
     93    count = _mm_add_epi8(count, pos_to_offset);
     94    _mm_store_si128((__m128i *)cc, count);
     95    pos_to_offset = pos_to_offset_large;
     96    levels += 4 * stride;
     97    cc += 16;
     98    col -= 4;
     99  } while (col);
    100 
    101  coeff_contexts[0] = 0;
    102 }
    103 
    104 static inline void get_4_nz_map_contexts_ver(const uint8_t *levels,
    105                                             const int width,
    106                                             const ptrdiff_t *const offsets,
    107                                             int8_t *coeff_contexts) {
    108  const int stride = 4 + TX_PAD_HOR;
    109  const __m128i pos_to_offset =
    110      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    111                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    112                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    113                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    114                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    115                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    116                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    117                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    118  __m128i count;
    119  __m128i level[5];
    120  int col = width;
    121 
    122  assert(!(width % 4));
    123 
    124  do {
    125    load_levels_4x4x5_sse2(levels, stride, offsets, level);
    126    count = get_coeff_contexts_kernel_sse2(level);
    127    count = _mm_add_epi8(count, pos_to_offset);
    128    _mm_store_si128((__m128i *)coeff_contexts, count);
    129    levels += 4 * stride;
    130    coeff_contexts += 16;
    131    col -= 4;
    132  } while (col);
    133 }
    134 
    135 static inline void get_4_nz_map_contexts_hor(const uint8_t *levels,
    136                                             const int width,
    137                                             const ptrdiff_t *const offsets,
    138                                             int8_t *coeff_contexts) {
    139  const int stride = 4 + TX_PAD_HOR;
    140  const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    141  __m128i pos_to_offset =
    142      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    143                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    144                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    145                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    146                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    147                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    148                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    149                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    150  __m128i count;
    151  __m128i level[5];
    152  int col = width;
    153 
    154  assert(!(width % 4));
    155 
    156  do {
    157    load_levels_4x4x5_sse2(levels, stride, offsets, level);
    158    count = get_coeff_contexts_kernel_sse2(level);
    159    count = _mm_add_epi8(count, pos_to_offset);
    160    _mm_store_si128((__m128i *)coeff_contexts, count);
    161    pos_to_offset = pos_to_offset_large;
    162    levels += 4 * stride;
    163    coeff_contexts += 16;
    164    col -= 4;
    165  } while (col);
    166 }
    167 
    168 static inline void get_8_coeff_contexts_2d(const uint8_t *levels,
    169                                           const int width,
    170                                           const ptrdiff_t *const offsets,
    171                                           int8_t *coeff_contexts) {
    172  const int stride = 8 + TX_PAD_HOR;
    173  int8_t *cc = coeff_contexts;
    174  int col = width;
    175  __m128i count;
    176  __m128i level[5];
    177  __m128i pos_to_offset[3];
    178 
    179  assert(!(width % 2));
    180 
    181  if (width == 8) {
    182    pos_to_offset[0] =
    183        _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21);
    184    pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
    185                                     21, 21, 21, 21, 21);
    186  } else if (width < 8) {
    187    pos_to_offset[0] = _mm_setr_epi8(0, 11, 6, 6, 21, 21, 21, 21, 11, 11, 6, 21,
    188                                     21, 21, 21, 21);
    189    pos_to_offset[1] = _mm_setr_epi8(11, 11, 21, 21, 21, 21, 21, 21, 11, 11, 21,
    190                                     21, 21, 21, 21, 21);
    191  } else {
    192    pos_to_offset[0] = _mm_setr_epi8(0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
    193                                     16, 16, 16, 16, 16);
    194    pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
    195                                     21, 21, 21, 21, 21);
    196  }
    197  pos_to_offset[2] = _mm_set1_epi8(21);
    198 
    199  do {
    200    load_levels_8x2x5_sse2(levels, stride, offsets, level);
    201    count = get_coeff_contexts_kernel_sse2(level);
    202    count = _mm_add_epi8(count, pos_to_offset[0]);
    203    _mm_store_si128((__m128i *)cc, count);
    204    pos_to_offset[0] = pos_to_offset[1];
    205    pos_to_offset[1] = pos_to_offset[2];
    206    levels += 2 * stride;
    207    cc += 16;
    208    col -= 2;
    209  } while (col);
    210 
    211  coeff_contexts[0] = 0;
    212 }
    213 
    214 static inline void get_8_coeff_contexts_ver(const uint8_t *levels,
    215                                            const int width,
    216                                            const ptrdiff_t *const offsets,
    217                                            int8_t *coeff_contexts) {
    218  const int stride = 8 + TX_PAD_HOR;
    219  const __m128i pos_to_offset =
    220      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    221                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    222                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    223                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    224                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    225                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    226                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    227                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    228  int col = width;
    229  __m128i count;
    230  __m128i level[5];
    231 
    232  assert(!(width % 2));
    233 
    234  do {
    235    load_levels_8x2x5_sse2(levels, stride, offsets, level);
    236    count = get_coeff_contexts_kernel_sse2(level);
    237    count = _mm_add_epi8(count, pos_to_offset);
    238    _mm_store_si128((__m128i *)coeff_contexts, count);
    239    levels += 2 * stride;
    240    coeff_contexts += 16;
    241    col -= 2;
    242  } while (col);
    243 }
    244 
    245 static inline void get_8_coeff_contexts_hor(const uint8_t *levels,
    246                                            const int width,
    247                                            const ptrdiff_t *const offsets,
    248                                            int8_t *coeff_contexts) {
    249  const int stride = 8 + TX_PAD_HOR;
    250  const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    251  __m128i pos_to_offset =
    252      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    253                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    254                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    255                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    256                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    257                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    258                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    259                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5);
    260  int col = width;
    261  __m128i count;
    262  __m128i level[5];
    263 
    264  assert(!(width % 2));
    265 
    266  do {
    267    load_levels_8x2x5_sse2(levels, stride, offsets, level);
    268    count = get_coeff_contexts_kernel_sse2(level);
    269    count = _mm_add_epi8(count, pos_to_offset);
    270    _mm_store_si128((__m128i *)coeff_contexts, count);
    271    pos_to_offset = pos_to_offset_large;
    272    levels += 2 * stride;
    273    coeff_contexts += 16;
    274    col -= 2;
    275  } while (col);
    276 }
    277 
    278 static inline void get_16n_coeff_contexts_2d(const uint8_t *levels,
    279                                             const int real_width,
    280                                             const int real_height,
    281                                             const int width, const int height,
    282                                             const ptrdiff_t *const offsets,
    283                                             int8_t *coeff_contexts) {
    284  const int stride = height + TX_PAD_HOR;
    285  int8_t *cc = coeff_contexts;
    286  int col = width;
    287  __m128i pos_to_offset[5];
    288  __m128i pos_to_offset_large[3];
    289  __m128i count;
    290  __m128i level[5];
    291 
    292  assert(!(height % 16));
    293 
    294  pos_to_offset_large[2] = _mm_set1_epi8(21);
    295  if (real_width == real_height) {
    296    pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    297                                     21, 21, 21, 21);
    298    pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    299                                     21, 21, 21, 21, 21);
    300    pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    301                                     21, 21, 21, 21, 21);
    302    pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    303                                     21, 21, 21, 21, 21);
    304    pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] =
    305        pos_to_offset_large[2];
    306  } else if (real_width < real_height) {
    307    pos_to_offset[0] = _mm_setr_epi8(0, 11, 6, 6, 21, 21, 21, 21, 21, 21, 21,
    308                                     21, 21, 21, 21, 21);
    309    pos_to_offset[1] = _mm_setr_epi8(11, 11, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    310                                     21, 21, 21, 21, 21);
    311    pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8(
    312        11, 11, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21);
    313    pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2];
    314  } else {  // real_width > real_height
    315    pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8(
    316        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16);
    317    pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    318                                     21, 21, 21, 21, 21);
    319    pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    320                                     21, 21, 21, 21, 21);
    321    pos_to_offset[4] = pos_to_offset_large[2];
    322    pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(16);
    323  }
    324 
    325  do {
    326    int h = height;
    327 
    328    do {
    329      load_levels_16x1x5_sse2(levels, stride, offsets, level);
    330      count = get_coeff_contexts_kernel_sse2(level);
    331      count = _mm_add_epi8(count, pos_to_offset[0]);
    332      _mm_store_si128((__m128i *)cc, count);
    333      levels += 16;
    334      cc += 16;
    335      h -= 16;
    336      pos_to_offset[0] = pos_to_offset_large[0];
    337    } while (h);
    338 
    339    pos_to_offset[0] = pos_to_offset[1];
    340    pos_to_offset[1] = pos_to_offset[2];
    341    pos_to_offset[2] = pos_to_offset[3];
    342    pos_to_offset[3] = pos_to_offset[4];
    343    pos_to_offset_large[0] = pos_to_offset_large[1];
    344    pos_to_offset_large[1] = pos_to_offset_large[2];
    345    levels += TX_PAD_HOR;
    346  } while (--col);
    347 
    348  coeff_contexts[0] = 0;
    349 }
    350 
    351 static inline void get_16n_coeff_contexts_ver(const uint8_t *levels,
    352                                              const int width, const int height,
    353                                              const ptrdiff_t *const offsets,
    354                                              int8_t *coeff_contexts) {
    355  const int stride = height + TX_PAD_HOR;
    356  const __m128i pos_to_offset_large =
    357      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    358                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    359                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    360                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    361                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    362                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    363                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    364                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    365  __m128i count;
    366  __m128i level[5];
    367  int col = width;
    368 
    369  assert(!(height % 16));
    370 
    371  do {
    372    __m128i pos_to_offset =
    373        _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    374                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    375                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    376                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    377                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    378                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    379                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    380                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    381    int h = height;
    382 
    383    do {
    384      load_levels_16x1x5_sse2(levels, stride, offsets, level);
    385      count = get_coeff_contexts_kernel_sse2(level);
    386      count = _mm_add_epi8(count, pos_to_offset);
    387      _mm_store_si128((__m128i *)coeff_contexts, count);
    388      pos_to_offset = pos_to_offset_large;
    389      levels += 16;
    390      coeff_contexts += 16;
    391      h -= 16;
    392    } while (h);
    393 
    394    levels += TX_PAD_HOR;
    395  } while (--col);
    396 }
    397 
    398 static inline void get_16n_coeff_contexts_hor(const uint8_t *levels,
    399                                              const int width, const int height,
    400                                              const ptrdiff_t *const offsets,
    401                                              int8_t *coeff_contexts) {
    402  const int stride = height + TX_PAD_HOR;
    403  __m128i pos_to_offset[3];
    404  __m128i count;
    405  __m128i level[5];
    406  int col = width;
    407 
    408  assert(!(height % 16));
    409 
    410  pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0);
    411  pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5);
    412  pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    413 
    414  do {
    415    int h = height;
    416 
    417    do {
    418      load_levels_16x1x5_sse2(levels, stride, offsets, level);
    419      count = get_coeff_contexts_kernel_sse2(level);
    420      count = _mm_add_epi8(count, pos_to_offset[0]);
    421      _mm_store_si128((__m128i *)coeff_contexts, count);
    422      levels += 16;
    423      coeff_contexts += 16;
    424      h -= 16;
    425    } while (h);
    426 
    427    pos_to_offset[0] = pos_to_offset[1];
    428    pos_to_offset[1] = pos_to_offset[2];
    429    levels += TX_PAD_HOR;
    430  } while (--col);
    431 }
    432 
    433 // Note: levels[] must be in the range [0, 127], inclusive.
    434 void av1_get_nz_map_contexts_sse2(const uint8_t *const levels,
    435                                  const int16_t *const scan, const uint16_t eob,
    436                                  const TX_SIZE tx_size,
    437                                  const TX_CLASS tx_class,
    438                                  int8_t *const coeff_contexts) {
    439  const int last_idx = eob - 1;
    440  if (!last_idx) {
    441    coeff_contexts[0] = 0;
    442    return;
    443  }
    444 
    445  const int real_width = tx_size_wide[tx_size];
    446  const int real_height = tx_size_high[tx_size];
    447  const int width = get_txb_wide(tx_size);
    448  const int height = get_txb_high(tx_size);
    449  const int stride = height + TX_PAD_HOR;
    450  ptrdiff_t offsets[3];
    451 
    452  /* coeff_contexts must be 16 byte aligned. */
    453  assert(!((intptr_t)coeff_contexts & 0xf));
    454 
    455  if (tx_class == TX_CLASS_2D) {
    456    offsets[0] = 0 * stride + 2;
    457    offsets[1] = 1 * stride + 1;
    458    offsets[2] = 2 * stride + 0;
    459 
    460    if (height == 4) {
    461      get_4_nz_map_contexts_2d(levels, width, offsets, coeff_contexts);
    462    } else if (height == 8) {
    463      get_8_coeff_contexts_2d(levels, width, offsets, coeff_contexts);
    464    } else if (height == 16) {
    465      get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
    466                                offsets, coeff_contexts);
    467    } else {
    468      get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
    469                                offsets, coeff_contexts);
    470    }
    471  } else if (tx_class == TX_CLASS_HORIZ) {
    472    offsets[0] = 2 * stride;
    473    offsets[1] = 3 * stride;
    474    offsets[2] = 4 * stride;
    475    if (height == 4) {
    476      get_4_nz_map_contexts_hor(levels, width, offsets, coeff_contexts);
    477    } else if (height == 8) {
    478      get_8_coeff_contexts_hor(levels, width, offsets, coeff_contexts);
    479    } else {
    480      get_16n_coeff_contexts_hor(levels, width, height, offsets,
    481                                 coeff_contexts);
    482    }
    483  } else {  // TX_CLASS_VERT
    484    offsets[0] = 2;
    485    offsets[1] = 3;
    486    offsets[2] = 4;
    487    if (height == 4) {
    488      get_4_nz_map_contexts_ver(levels, width, offsets, coeff_contexts);
    489    } else if (height == 8) {
    490      get_8_coeff_contexts_ver(levels, width, offsets, coeff_contexts);
    491    } else {
    492      get_16n_coeff_contexts_ver(levels, width, height, offsets,
    493                                 coeff_contexts);
    494    }
    495  }
    496 
    497  const int bhl = get_txb_bhl(tx_size);
    498  const int pos = scan[last_idx];
    499  if (last_idx <= (width << bhl) / 8)
    500    coeff_contexts[pos] = 1;
    501  else if (last_idx <= (width << bhl) / 4)
    502    coeff_contexts[pos] = 2;
    503  else
    504    coeff_contexts[pos] = 3;
    505 }