neon.S (7496B)
1 /* 2 * This file is part of FFmpeg. 3 * 4 * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 .macro clip min, max, regs:vararg 22 .irp x, \regs 23 smax \x, \x, \min 24 .endr 25 .irp x, \regs 26 smin \x, \x, \max 27 .endr 28 .endm 29 30 .macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 31 trn1 \r8\().8b, \r0\().8b, \r1\().8b 32 trn2 \r9\().8b, \r0\().8b, \r1\().8b 33 trn1 \r1\().8b, \r2\().8b, \r3\().8b 34 trn2 \r3\().8b, \r2\().8b, \r3\().8b 35 trn1 \r0\().8b, \r4\().8b, \r5\().8b 36 trn2 \r5\().8b, \r4\().8b, \r5\().8b 37 trn1 \r2\().8b, \r6\().8b, \r7\().8b 38 trn2 \r7\().8b, \r6\().8b, \r7\().8b 39 40 trn1 \r4\().4h, \r0\().4h, \r2\().4h 41 trn2 \r2\().4h, \r0\().4h, \r2\().4h 42 trn1 \r6\().4h, \r5\().4h, \r7\().4h 43 trn2 \r7\().4h, \r5\().4h, \r7\().4h 44 trn1 \r5\().4h, \r9\().4h, \r3\().4h 45 trn2 \r9\().4h, \r9\().4h, \r3\().4h 46 trn1 \r3\().4h, \r8\().4h, \r1\().4h 47 trn2 \r8\().4h, \r8\().4h, \r1\().4h 48 49 trn1 \r0\().2s, \r3\().2s, \r4\().2s 50 trn2 \r4\().2s, \r3\().2s, \r4\().2s 51 52 trn1 \r1\().2s, \r5\().2s, \r6\().2s 53 trn2 \r5\().2s, \r5\().2s, \r6\().2s 54 55 trn2 \r6\().2s, \r8\().2s, \r2\().2s 56 trn1 \r2\().2s, \r8\().2s, \r2\().2s 57 58 trn1 \r3\().2s, \r9\().2s, \r7\().2s 59 trn2 \r7\().2s, \r9\().2s, \r7\().2s 60 .endm 61 62 .macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 63 trn1 \t0\().16b, \r0\().16b, \r1\().16b 64 trn2 \t1\().16b, \r0\().16b, \r1\().16b 65 trn1 \r1\().16b, \r2\().16b, \r3\().16b 66 trn2 \r3\().16b, \r2\().16b, \r3\().16b 67 trn1 \r0\().16b, \r4\().16b, \r5\().16b 68 trn2 \r5\().16b, \r4\().16b, \r5\().16b 69 trn1 \r2\().16b, \r6\().16b, \r7\().16b 70 trn2 \r7\().16b, \r6\().16b, \r7\().16b 71 72 trn1 \r4\().8h, \r0\().8h, \r2\().8h 73 trn2 \r2\().8h, \r0\().8h, \r2\().8h 74 trn1 \r6\().8h, \r5\().8h, \r7\().8h 75 trn2 \r7\().8h, \r5\().8h, \r7\().8h 76 trn1 \r5\().8h, \t1\().8h, \r3\().8h 77 trn2 \t1\().8h, \t1\().8h, \r3\().8h 78 trn1 \r3\().8h, \t0\().8h, \r1\().8h 79 trn2 \t0\().8h, \t0\().8h, \r1\().8h 80 81 trn1 \r0\().4s, \r3\().4s, \r4\().4s 82 trn2 \r4\().4s, \r3\().4s, \r4\().4s 83 84 trn1 \r1\().4s, \r5\().4s, \r6\().4s 85 trn2 \r5\().4s, \r5\().4s, \r6\().4s 86 87 trn2 \r6\().4s, \t0\().4s, \r2\().4s 88 trn1 \r2\().4s, \t0\().4s, \r2\().4s 89 90 trn1 \r3\().4s, \t1\().4s, \r7\().4s 91 trn2 \r7\().4s, \t1\().4s, \r7\().4s 92 .endm 93 94 .macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7 95 trn1 \t4\().16b, \r0\().16b, \r1\().16b 96 trn2 \t5\().16b, \r0\().16b, \r1\().16b 97 trn1 \t6\().16b, \r2\().16b, \r3\().16b 98 trn2 \t7\().16b, \r2\().16b, \r3\().16b 99 100 trn1 \r0\().8h, \t4\().8h, \t6\().8h 101 trn2 \r2\().8h, \t4\().8h, \t6\().8h 102 trn1 \r1\().8h, \t5\().8h, \t7\().8h 103 trn2 \r3\().8h, \t5\().8h, \t7\().8h 104 .endm 105 106 .macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7 107 trn1 \t4\().8b, \r0\().8b, \r1\().8b 108 trn2 \t5\().8b, \r0\().8b, \r1\().8b 109 trn1 \t6\().8b, \r2\().8b, \r3\().8b 110 trn2 \t7\().8b, \r2\().8b, \r3\().8b 111 112 trn1 \r0\().4h, \t4\().4h, \t6\().4h 113 trn2 \r2\().4h, \t4\().4h, \t6\().4h 114 trn1 \r1\().4h, \t5\().4h, \t7\().4h 115 trn2 \r3\().4h, \t5\().4h, \t7\().4h 116 .endm 117 118 .macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7 119 trn1 \r4\().4h, \r0\().4h, \r1\().4h 120 trn2 \r5\().4h, \r0\().4h, \r1\().4h 121 trn1 \r6\().4h, \r2\().4h, \r3\().4h 122 trn2 \r7\().4h, \r2\().4h, \r3\().4h 123 124 trn1 \r0\().2s, \r4\().2s, \r6\().2s 125 trn2 \r2\().2s, \r4\().2s, \r6\().2s 126 trn1 \r1\().2s, \r5\().2s, \r7\().2s 127 trn2 \r3\().2s, \r5\().2s, \r7\().2s 128 .endm 129 130 .macro transpose_4x8H r0, r1, r2, r3, t4, t5, t6, t7 131 trn1 \t4\().8h, \r0\().8h, \r1\().8h 132 trn2 \t5\().8h, \r0\().8h, \r1\().8h 133 trn1 \t6\().8h, \r2\().8h, \r3\().8h 134 trn2 \t7\().8h, \r2\().8h, \r3\().8h 135 136 trn1 \r0\().4s, \t4\().4s, \t6\().4s 137 trn2 \r2\().4s, \t4\().4s, \t6\().4s 138 trn1 \r1\().4s, \t5\().4s, \t7\().4s 139 trn2 \r3\().4s, \t5\().4s, \t7\().4s 140 .endm 141 142 .macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 143 trn1 \r8\().8h, \r0\().8h, \r1\().8h 144 trn2 \r9\().8h, \r0\().8h, \r1\().8h 145 trn1 \r1\().8h, \r2\().8h, \r3\().8h 146 trn2 \r3\().8h, \r2\().8h, \r3\().8h 147 trn1 \r0\().8h, \r4\().8h, \r5\().8h 148 trn2 \r5\().8h, \r4\().8h, \r5\().8h 149 trn1 \r2\().8h, \r6\().8h, \r7\().8h 150 trn2 \r7\().8h, \r6\().8h, \r7\().8h 151 152 trn1 \r4\().4s, \r0\().4s, \r2\().4s 153 trn2 \r2\().4s, \r0\().4s, \r2\().4s 154 trn1 \r6\().4s, \r5\().4s, \r7\().4s 155 trn2 \r7\().4s, \r5\().4s, \r7\().4s 156 trn1 \r5\().4s, \r9\().4s, \r3\().4s 157 trn2 \r9\().4s, \r9\().4s, \r3\().4s 158 trn1 \r3\().4s, \r8\().4s, \r1\().4s 159 trn2 \r8\().4s, \r8\().4s, \r1\().4s 160 161 trn1 \r0\().2d, \r3\().2d, \r4\().2d 162 trn2 \r4\().2d, \r3\().2d, \r4\().2d 163 164 trn1 \r1\().2d, \r5\().2d, \r6\().2d 165 trn2 \r5\().2d, \r5\().2d, \r6\().2d 166 167 trn2 \r6\().2d, \r8\().2d, \r2\().2d 168 trn1 \r2\().2d, \r8\().2d, \r2\().2d 169 170 trn1 \r3\().2d, \r9\().2d, \r7\().2d 171 trn2 \r7\().2d, \r9\().2d, \r7\().2d 172 173 .endm