[ tor-browser ].git.dasho

jquanti-mmi.c (4498B)
      1 /*
      2 * Loongson MMI optimizations for libjpeg-turbo
      3 *
      4 * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
      5 *                          All Rights Reserved.
      6 * Authors:  ZhuChen     <zhuchen@loongson.cn>
      7 *           CaiWanwei   <caiwanwei@loongson.cn>
      8 *           SunZhangzhi <sunzhangzhi-cq@loongson.cn>
      9 * Copyright (C) 2018-2019, D. R. Commander.  All Rights Reserved.
     10 *
     11 * Based on the x86 SIMD extension for IJG JPEG library
     12 * Copyright (C) 1999-2006, MIYASAKA Masaru.
     13 *
     14 * This software is provided 'as-is', without any express or implied
     15 * warranty.  In no event will the authors be held liable for any damages
     16 * arising from the use of this software.
     17 *
     18 * Permission is granted to anyone to use this software for any purpose,
     19 * including commercial applications, and to alter it and redistribute it
     20 * freely, subject to the following restrictions:
     21 *
     22 * 1. The origin of this software must not be misrepresented; you must not
     23 *    claim that you wrote the original software. If you use this software
     24 *    in a product, an acknowledgment in the product documentation would be
     25 *    appreciated but is not required.
     26 * 2. Altered source versions must be plainly marked as such, and must not be
     27 *    misrepresented as being the original software.
     28 * 3. This notice may not be removed or altered from any source distribution.
     29 */
     30 
     31 /* INTEGER QUANTIZATION AND SAMPLE CONVERSION */
     32 
     33 #include "jsimd_mmi.h"
     34 
     35 
     36 #define DO_QUANT() { \
     37  __m64 rowl, rowh, rowls, rowhs, rowlsave, rowhsave; \
     38  __m64 corrl, corrh, recipl, reciph, scalel, scaleh; \
     39  \
     40  rowl = _mm_load_si64((__m64 *)&workspace[0]); \
     41  rowh = _mm_load_si64((__m64 *)&workspace[4]); \
     42  \
     43  /* Branch-less absolute value */ \
     44  rowls = _mm_srai_pi16(rowl, (WORD_BIT - 1));  /* -1 if value < 0, */ \
     45                                                /* 0 otherwise */ \
     46  rowhs = _mm_srai_pi16(rowh, (WORD_BIT - 1)); \
     47  \
     48  rowl = _mm_xor_si64(rowl, rowls);           /* val = -val */ \
     49  rowh = _mm_xor_si64(rowh, rowhs); \
     50  rowl = _mm_sub_pi16(rowl, rowls); \
     51  rowh = _mm_sub_pi16(rowh, rowhs); \
     52  \
     53  corrl = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 1]);  /* correction */ \
     54  corrh = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 1 + 4]); \
     55  \
     56  rowlsave = rowl = _mm_add_pi16(rowl, corrl);  /* correction + roundfactor */ \
     57  rowhsave = rowh = _mm_add_pi16(rowh, corrh); \
     58  \
     59  recipl = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 0]);  /* reciprocal */ \
     60  reciph = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 0 + 4]); \
     61  \
     62  rowl = _mm_mulhi_pi16(rowl, recipl); \
     63  rowh = _mm_mulhi_pi16(rowh, reciph); \
     64  \
     65  /* reciprocal is always negative (MSB=1), so we always need to add the */ \
     66  /* initial value (input value is never negative as we inverted it at the */ \
     67  /* start of this routine) */ \
     68  rowlsave = rowl = _mm_add_pi16(rowl, rowlsave); \
     69  rowhsave = rowh = _mm_add_pi16(rowh, rowhsave); \
     70  \
     71  scalel = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 2]);  /* scale */ \
     72  scaleh = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 2 + 4]); \
     73  \
     74  rowl = _mm_mulhi_pi16(rowl, scalel); \
     75  rowh = _mm_mulhi_pi16(rowh, scaleh); \
     76  \
     77  /* determine if scale is negative */ \
     78  scalel = _mm_srai_pi16(scalel, (WORD_BIT - 1)); \
     79  scaleh = _mm_srai_pi16(scaleh, (WORD_BIT - 1)); \
     80  \
     81  /* and add input if it is */ \
     82  scalel = _mm_and_si64(scalel, rowlsave); \
     83  scaleh = _mm_and_si64(scaleh, rowhsave); \
     84  rowl = _mm_add_pi16(rowl, scalel); \
     85  rowh = _mm_add_pi16(rowh, scaleh); \
     86  \
     87  /* then check if negative input */ \
     88  rowlsave = _mm_srai_pi16(rowlsave, (WORD_BIT - 1)); \
     89  rowhsave = _mm_srai_pi16(rowhsave, (WORD_BIT - 1)); \
     90  \
     91  /* and add scale if it is */ \
     92  rowlsave = _mm_and_si64(rowlsave, scalel); \
     93  rowhsave = _mm_and_si64(rowhsave, scaleh); \
     94  rowl = _mm_add_pi16(rowl, rowlsave); \
     95  rowh = _mm_add_pi16(rowh, rowhsave); \
     96  \
     97  rowl = _mm_xor_si64(rowl, rowls);           /* val = -val */ \
     98  rowh = _mm_xor_si64(rowh, rowhs); \
     99  rowl = _mm_sub_pi16(rowl, rowls); \
    100  rowh = _mm_sub_pi16(rowh, rowhs); \
    101  \
    102  _mm_store_si64((__m64 *)&output_ptr[0], rowl); \
    103  _mm_store_si64((__m64 *)&output_ptr[4], rowh); \
    104  \
    105  workspace += DCTSIZE; \
    106  divisors += DCTSIZE; \
    107  output_ptr += DCTSIZE; \
    108 }
    109 
    110 
    111 void jsimd_quantize_mmi(JCOEFPTR coef_block, DCTELEM *divisors,
    112                        DCTELEM *workspace)
    113 {
    114  JCOEFPTR output_ptr = coef_block;
    115 
    116  DO_QUANT()
    117  DO_QUANT()
    118  DO_QUANT()
    119  DO_QUANT()
    120  DO_QUANT()
    121  DO_QUANT()
    122  DO_QUANT()
    123  DO_QUANT()
    124 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE