fixed_dsp.h (7829B)
1 /* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Author: Nedeljko Babic (nbabic@mips.com) 30 * 31 * This file is part of FFmpeg. 32 * 33 * FFmpeg is free software; you can redistribute it and/or 34 * modify it under the terms of the GNU Lesser General Public 35 * License as published by the Free Software Foundation; either 36 * version 2.1 of the License, or (at your option) any later version. 37 * 38 * FFmpeg is distributed in the hope that it will be useful, 39 * but WITHOUT ANY WARRANTY; without even the implied warranty of 40 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 41 * Lesser General Public License for more details. 42 * 43 * You should have received a copy of the GNU Lesser General Public 44 * License along with FFmpeg; if not, write to the Free Software 45 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 46 */ 47 48 #ifndef AVUTIL_FIXED_DSP_H 49 #define AVUTIL_FIXED_DSP_H 50 51 #include <stdint.h> 52 #include "attributes.h" 53 #include "libavcodec/mathops.h" 54 55 typedef struct AVFixedDSPContext { 56 /* Assume len is a multiple of 16, and arrays are 32-byte aligned */ 57 /* Results of multiplications are scaled down by 31 bit (and rounded) if not 58 * stated otherwise */ 59 60 /** 61 * Overlap/add with window function. 62 * Result is scaled down by "bits" bits. 63 * Used primarily by MDCT-based audio codecs. 64 * Source and destination vectors must overlap exactly or not at all. 65 * 66 * @param dst result vector 67 * constraints: 16-byte aligned 68 * @param src0 first source vector 69 * constraints: 16-byte aligned 70 * @param src1 second source vector 71 * constraints: 16-byte aligned 72 * @param win half-window vector 73 * constraints: 16-byte aligned 74 * @param len length of vector 75 * constraints: multiple of 4 76 * @param bits scaling parameter 77 * 78 */ 79 void (*vector_fmul_window_scaled)(int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits); 80 81 /** 82 * Overlap/add with window function. 83 * Used primarily by MDCT-based audio codecs. 84 * Source and destination vectors must overlap exactly or not at all. 85 * 86 * @param dst result vector 87 * constraints: 32-byte aligned 88 * @param src0 first source vector 89 * constraints: 16-byte aligned 90 * @param src1 second source vector 91 * constraints: 16-byte aligned 92 * @param win half-window vector 93 * constraints: 16-byte aligned 94 * @param len length of vector 95 * constraints: multiple of 4 96 */ 97 void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len); 98 99 /** 100 * Fixed-point multiplication that calculates the entry wise product of two 101 * vectors of integers and stores the result in a vector of integers. 102 * 103 * @param dst output vector 104 * constraints: 32-byte aligned 105 * @param src0 first input vector 106 * constraints: 32-byte aligned 107 * @param src1 second input vector 108 * constraints: 32-byte aligned 109 * @param len number of elements in the input 110 * constraints: multiple of 16 111 */ 112 void (*vector_fmul)(int *dst, const int *src0, const int *src1, 113 int len); 114 115 void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len); 116 /** 117 * Calculate the entry wise product of two vectors of integers, add a third vector of 118 * integers and store the result in a vector of integers. 119 * 120 * @param dst output vector 121 * constraints: 32-byte aligned 122 * @param src0 first input vector 123 * constraints: 32-byte aligned 124 * @param src1 second input vector 125 * constraints: 32-byte aligned 126 * @param src2 third input vector 127 * constraints: 32-byte aligned 128 * @param len number of elements in the input 129 * constraints: multiple of 16 130 */ 131 void (*vector_fmul_add)(int *dst, const int *src0, const int *src1, 132 const int *src2, int len); 133 134 /** 135 * Calculate the scalar product of two vectors of integers. 136 * 137 * @param v1 first vector, 16-byte aligned 138 * @param v2 second vector, 16-byte aligned 139 * @param len length of vectors, multiple of 4 140 * 141 * @return sum of elementwise products 142 */ 143 int (*scalarproduct_fixed)(const int *v1, const int *v2, int len); 144 145 /** 146 * Calculate the sum and difference of two vectors of integers. 147 * 148 * @param v1 first input vector, sum output, 16-byte aligned 149 * @param v2 second input vector, difference output, 16-byte aligned 150 * @param len length of vectors, multiple of 4 151 */ 152 void (*butterflies_fixed)(int *restrict v1, int *restrict v2, int len); 153 } AVFixedDSPContext; 154 155 /** 156 * Allocate and initialize a fixed DSP context. 157 * note: should be freed with a av_free call when no longer needed. 158 * 159 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant 160 */ 161 AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict); 162 163 void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp); 164 void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp); 165 166 /** 167 * Calculate the square root 168 * 169 * @param x input fixed point number 170 * 171 * @param bits format of fixed point number (32 - bits).bits 172 * 173 * note: input is normalized to (0, 1) fixed point value 174 */ 175 176 static av_always_inline int fixed_sqrt(int x, int bits) 177 { 178 int retval, bit_mask, guess, square, i; 179 int64_t accu; 180 int shift1 = 30 - bits; 181 int shift2 = bits - 15; 182 183 if (shift1 > 0) retval = ff_sqrt(x << shift1); 184 else retval = ff_sqrt(x >> -shift1); 185 186 if (shift2 > 0) { 187 retval = retval << shift2; 188 bit_mask = (1 << (shift2 - 1)); 189 190 for (i=0; i<shift2; i++){ 191 guess = retval + bit_mask; 192 accu = (int64_t)guess * guess; 193 square = (int)((accu + bit_mask) >> bits); 194 if (x >= square) 195 retval += bit_mask; 196 bit_mask >>= 1; 197 } 198 199 } 200 else retval >>= (-shift2); 201 202 return retval; 203 } 204 205 #endif /* AVUTIL_FIXED_DSP_H */