mpv_sparc.c (5482B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "vis_proto.h" 6 7 /***************************************************************/ 8 9 typedef int t_s32; 10 typedef unsigned int t_u32; 11 #if defined(__sparcv9) 12 typedef long t_s64; 13 typedef unsigned long t_u64; 14 #else 15 typedef long long t_s64; 16 typedef unsigned long long t_u64; 17 #endif 18 typedef double t_d64; 19 20 /***************************************************************/ 21 22 typedef union { 23 t_d64 d64; 24 struct { 25 t_s32 i0; 26 t_s32 i1; 27 } i32s; 28 } d64_2_i32; 29 30 /***************************************************************/ 31 32 #define BUFF_SIZE 256 33 34 #define A_BITS 19 35 #define A_MASK ((1 << A_BITS) - 1) 36 37 /***************************************************************/ 38 39 static t_u64 mask_cnst[] = { 40 0x8000000080000000ull 41 }; 42 43 /***************************************************************/ 44 45 #define DEF_VARS(N) \ 46 t_d64 *py = (t_d64 *)y; \ 47 t_d64 mask = *((t_d64 *)mask_cnst); \ 48 t_d64 ca = (1u << 31) - 1; \ 49 t_d64 da = (t_d64)a; \ 50 t_s64 buff[N], s; \ 51 d64_2_i32 dy 52 53 /***************************************************************/ 54 55 #define MUL_U32_S64_2(i) \ 56 dy.d64 = vis_fxnor(mask, py[i]); \ 57 buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \ 58 buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da 59 60 #define MUL_U32_S64_2_D(i) \ 61 dy.d64 = vis_fxnor(mask, py[i]); \ 62 d0 = ca - (t_d64)dy.i32s.i0; \ 63 d1 = ca - (t_d64)dy.i32s.i1; \ 64 buff[4 * (i)] = (t_s64)(d0 * da); \ 65 buff[4 * (i) + 1] = (t_s64)(d0 * db); \ 66 buff[4 * (i) + 2] = (t_s64)(d1 * da); \ 67 buff[4 * (i) + 3] = (t_s64)(d1 * db) 68 69 /***************************************************************/ 70 71 #define ADD_S64_U32(i) \ 72 s = buff[i] + x[i] + c; \ 73 z[i] = s; \ 74 c = (s >> 32) 75 76 #define ADD_S64_U32_D(i) \ 77 s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \ 78 z[i] = s; \ 79 uc = ((t_u64)s >> 32) 80 81 /***************************************************************/ 82 83 #define MUL_U32_S64_8(i) \ 84 MUL_U32_S64_2(i); \ 85 MUL_U32_S64_2(i + 1); \ 86 MUL_U32_S64_2(i + 2); \ 87 MUL_U32_S64_2(i + 3) 88 89 #define MUL_U32_S64_D_8(i) \ 90 MUL_U32_S64_2_D(i); \ 91 MUL_U32_S64_2_D(i + 1); \ 92 MUL_U32_S64_2_D(i + 2); \ 93 MUL_U32_S64_2_D(i + 3) 94 95 /***************************************************************/ 96 97 #define ADD_S64_U32_8(i) \ 98 ADD_S64_U32(i); \ 99 ADD_S64_U32(i + 1); \ 100 ADD_S64_U32(i + 2); \ 101 ADD_S64_U32(i + 3); \ 102 ADD_S64_U32(i + 4); \ 103 ADD_S64_U32(i + 5); \ 104 ADD_S64_U32(i + 6); \ 105 ADD_S64_U32(i + 7) 106 107 #define ADD_S64_U32_D_8(i) \ 108 ADD_S64_U32_D(i); \ 109 ADD_S64_U32_D(i + 1); \ 110 ADD_S64_U32_D(i + 2); \ 111 ADD_S64_U32_D(i + 3); \ 112 ADD_S64_U32_D(i + 4); \ 113 ADD_S64_U32_D(i + 5); \ 114 ADD_S64_U32_D(i + 6); \ 115 ADD_S64_U32_D(i + 7) 116 117 /***************************************************************/ 118 119 t_u32 120 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) 121 { 122 if (a < (1 << A_BITS)) { 123 124 if (n == 8) { 125 DEF_VARS(8); 126 t_s32 c = 0; 127 128 MUL_U32_S64_8(0); 129 ADD_S64_U32_8(0); 130 131 return c; 132 133 } else if (n == 16) { 134 DEF_VARS(16); 135 t_s32 c = 0; 136 137 MUL_U32_S64_8(0); 138 MUL_U32_S64_8(4); 139 ADD_S64_U32_8(0); 140 ADD_S64_U32_8(8); 141 142 return c; 143 144 } else { 145 DEF_VARS(BUFF_SIZE); 146 t_s32 i, c = 0; 147 148 #pragma pipeloop(0) 149 for (i = 0; i < (n + 1) / 2; i++) { 150 MUL_U32_S64_2(i); 151 } 152 153 #pragma pipeloop(0) 154 for (i = 0; i < n; i++) { 155 ADD_S64_U32(i); 156 } 157 158 return c; 159 } 160 } else { 161 162 if (n == 8) { 163 DEF_VARS(2 * 8); 164 t_d64 d0, d1, db; 165 t_u32 uc = 0; 166 167 da = (t_d64)(a & A_MASK); 168 db = (t_d64)(a >> A_BITS); 169 170 MUL_U32_S64_D_8(0); 171 ADD_S64_U32_D_8(0); 172 173 return uc; 174 175 } else if (n == 16) { 176 DEF_VARS(2 * 16); 177 t_d64 d0, d1, db; 178 t_u32 uc = 0; 179 180 da = (t_d64)(a & A_MASK); 181 db = (t_d64)(a >> A_BITS); 182 183 MUL_U32_S64_D_8(0); 184 MUL_U32_S64_D_8(4); 185 ADD_S64_U32_D_8(0); 186 ADD_S64_U32_D_8(8); 187 188 return uc; 189 190 } else { 191 DEF_VARS(2 * BUFF_SIZE); 192 t_d64 d0, d1, db; 193 t_u32 i, uc = 0; 194 195 da = (t_d64)(a & A_MASK); 196 db = (t_d64)(a >> A_BITS); 197 198 #pragma pipeloop(0) 199 for (i = 0; i < (n + 1) / 2; i++) { 200 MUL_U32_S64_2_D(i); 201 } 202 203 #pragma pipeloop(0) 204 for (i = 0; i < n; i++) { 205 ADD_S64_U32_D(i); 206 } 207 208 return uc; 209 } 210 } 211 } 212 213 /***************************************************************/ 214 215 t_u32 216 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) 217 { 218 return mul_add(x, x, y, n, a); 219 } 220 221 /***************************************************************/