asm.S (10910B)
1 /* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2018, Janne Grunau 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifndef DAV1D_SRC_ARM_ASM_S 29 #define DAV1D_SRC_ARM_ASM_S 30 31 #include "config.h" 32 33 #if ARCH_AARCH64 34 #define x18 do_not_use_x18 35 #define w18 do_not_use_w18 36 37 #if HAVE_AS_ARCH_DIRECTIVE 38 .arch AS_ARCH_LEVEL 39 #endif 40 41 #if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE 42 #define ENABLE_DOTPROD .arch_extension dotprod 43 #define DISABLE_DOTPROD .arch_extension nodotprod 44 #else 45 #define ENABLE_DOTPROD 46 #define DISABLE_DOTPROD 47 #endif 48 #if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE 49 #define ENABLE_I8MM .arch_extension i8mm 50 #define DISABLE_I8MM .arch_extension noi8mm 51 #else 52 #define ENABLE_I8MM 53 #define DISABLE_I8MM 54 #endif 55 #if HAVE_AS_ARCHEXT_SVE_DIRECTIVE 56 #define ENABLE_SVE .arch_extension sve 57 #define DISABLE_SVE .arch_extension nosve 58 #else 59 #define ENABLE_SVE 60 #define DISABLE_SVE 61 #endif 62 #if HAVE_AS_ARCHEXT_SVE2_DIRECTIVE 63 #define ENABLE_SVE2 .arch_extension sve2 64 #define DISABLE_SVE2 .arch_extension nosve2 65 #else 66 #define ENABLE_SVE2 67 #define DISABLE_SVE2 68 #endif 69 70 /* If we do support the .arch_extension directives, disable support for all 71 * the extensions that we may use, in case they were implicitly enabled by 72 * the .arch level. This makes it clear if we try to assemble an instruction 73 * from an unintended extension set; we only allow assmbling such instructions 74 * within regions where we explicitly enable those extensions. */ 75 DISABLE_DOTPROD 76 DISABLE_I8MM 77 DISABLE_SVE 78 DISABLE_SVE2 79 80 81 /* Support macros for 82 * - Armv8.3-A Pointer Authentication and 83 * - Armv8.5-A Branch Target Identification 84 * features which require emitting a .note.gnu.property section with the 85 * appropriate architecture-dependent feature bits set. 86 * 87 * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to 88 * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be 89 * used immediately before saving the LR register (x30) to the stack. 90 * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring 91 * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone 92 * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also 93 * have the same value at the two points. For example: 94 * 95 * .global f 96 * f: 97 * AARCH64_SIGN_LINK_REGISTER 98 * stp x29, x30, [sp, #-96]! 99 * mov x29, sp 100 * ... 101 * ldp x29, x30, [sp], #96 102 * AARCH64_VALIDATE_LINK_REGISTER 103 * ret 104 * 105 * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or 106 * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an 107 * indirect call target. In particular, all symbols exported from a file must 108 * begin with one of these macros. For example, a leaf function that does not 109 * save LR can instead use |AARCH64_VALID_CALL_TARGET|: 110 * 111 * .globl return_zero 112 * return_zero: 113 * AARCH64_VALID_CALL_TARGET 114 * mov x0, #0 115 * ret 116 * 117 * A non-leaf function which does not immediately save LR may need both macros 118 * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function 119 * may jump to an alternate implementation before setting up the stack: 120 * 121 * .globl with_early_jump 122 * with_early_jump: 123 * AARCH64_VALID_CALL_TARGET 124 * cmp x0, #128 125 * b.lt .Lwith_early_jump_128 126 * AARCH64_SIGN_LINK_REGISTER 127 * stp x29, x30, [sp, #-96]! 128 * mov x29, sp 129 * ... 130 * ldp x29, x30, [sp], #96 131 * AARCH64_VALIDATE_LINK_REGISTER 132 * ret 133 * 134 * .Lwith_early_jump_128: 135 * ... 136 * ret 137 * 138 * These annotations are only required with indirect calls. Private symbols that 139 * are only the target of direct calls do not require annotations. Also note 140 * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not 141 * indirect jumps (BR). Indirect jumps in assembly are supported through 142 * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and 143 * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|. 144 * 145 * Although not necessary, it is safe to use these macros in 32-bit ARM 146 * assembly. This may be used to simplify dual 32-bit and 64-bit files. 147 * 148 * References: 149 * - "ELF for the Arm® 64-bit Architecture" 150 * https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst 151 * - "Providing protection for complex software" 152 * https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software 153 */ 154 #if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1) 155 #define GNU_PROPERTY_AARCH64_BTI (1 << 0) // Has Branch Target Identification 156 #define AARCH64_VALID_JUMP_CALL_TARGET hint #38 // BTI 'jc' 157 #define AARCH64_VALID_CALL_TARGET hint #34 // BTI 'c' 158 #define AARCH64_VALID_JUMP_TARGET hint #36 // BTI 'j' 159 #else 160 #define GNU_PROPERTY_AARCH64_BTI 0 // No Branch Target Identification 161 #define AARCH64_VALID_JUMP_CALL_TARGET 162 #define AARCH64_VALID_CALL_TARGET 163 #define AARCH64_VALID_JUMP_TARGET 164 #endif 165 166 #if defined(__ARM_FEATURE_PAC_DEFAULT) 167 168 #if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A 169 #define AARCH64_SIGN_LINK_REGISTER paciasp 170 #define AARCH64_VALIDATE_LINK_REGISTER autiasp 171 #elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B 172 #define AARCH64_SIGN_LINK_REGISTER pacibsp 173 #define AARCH64_VALIDATE_LINK_REGISTER autibsp 174 #else 175 #error Pointer authentication defines no valid key! 176 #endif 177 #if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0) // authentication of leaf functions 178 #error Authentication of leaf functions is enabled but not supported in dav1d! 179 #endif 180 #define GNU_PROPERTY_AARCH64_PAC (1 << 1) 181 182 #elif defined(__APPLE__) && defined(__arm64e__) 183 184 #define GNU_PROPERTY_AARCH64_PAC 0 185 #define AARCH64_SIGN_LINK_REGISTER pacibsp 186 #define AARCH64_VALIDATE_LINK_REGISTER autibsp 187 188 #else /* __ARM_FEATURE_PAC_DEFAULT */ 189 190 #define GNU_PROPERTY_AARCH64_PAC 0 191 #define AARCH64_SIGN_LINK_REGISTER 192 #define AARCH64_VALIDATE_LINK_REGISTER 193 194 #endif /* !__ARM_FEATURE_PAC_DEFAULT */ 195 196 197 #if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) 198 .pushsection .note.gnu.property, "a" 199 .balign 8 200 .long 4 201 .long 0x10 202 .long 0x5 203 .asciz "GNU" 204 .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ 205 .long 4 206 .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC) 207 .long 0 208 .popsection 209 #endif /* (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) */ 210 #endif /* ARCH_AARCH64 */ 211 212 #if ARCH_ARM 213 .syntax unified 214 #ifdef __ELF__ 215 .arch armv7-a 216 .fpu neon 217 .eabi_attribute 10, 0 // suppress Tag_FP_arch 218 .eabi_attribute 12, 0 // suppress Tag_Advanced_SIMD_arch 219 .section .note.GNU-stack,"",%progbits // Mark stack as non-executable 220 #endif /* __ELF__ */ 221 222 #ifdef _WIN32 223 #define CONFIG_THUMB 1 224 #else 225 #define CONFIG_THUMB 0 226 #endif 227 228 #if CONFIG_THUMB 229 .thumb 230 #define A @ 231 #define T 232 #else 233 #define A 234 #define T @ 235 #endif /* CONFIG_THUMB */ 236 #endif /* ARCH_ARM */ 237 238 #if !defined(PIC) 239 #if defined(__PIC__) 240 #define PIC __PIC__ 241 #elif defined(__pic__) 242 #define PIC __pic__ 243 #endif 244 #endif 245 246 #ifndef PRIVATE_PREFIX 247 #define PRIVATE_PREFIX dav1d_ 248 #endif 249 250 #define PASTE(a,b) a ## b 251 #define CONCAT(a,b) PASTE(a,b) 252 253 #ifdef PREFIX 254 #define EXTERN CONCAT(_,PRIVATE_PREFIX) 255 #else 256 #define EXTERN PRIVATE_PREFIX 257 #endif 258 259 .macro function name, export=0, align=2 260 .macro endfunc 261 #ifdef __ELF__ 262 .size \name, . - \name 263 #endif 264 #if HAVE_AS_FUNC 265 .endfunc 266 #endif 267 .purgem endfunc 268 .endm 269 .text 270 .align \align 271 .if \export 272 .global EXTERN\name 273 #ifdef __ELF__ 274 .type EXTERN\name, %function 275 .hidden EXTERN\name 276 #elif defined(__MACH__) 277 .private_extern EXTERN\name 278 #endif 279 #if HAVE_AS_FUNC 280 .func EXTERN\name 281 #endif 282 EXTERN\name: 283 .else 284 #ifdef __ELF__ 285 .type \name, %function 286 #endif 287 #if HAVE_AS_FUNC 288 .func \name 289 #endif 290 .endif 291 \name: 292 #if ARCH_AARCH64 293 .if \export 294 AARCH64_VALID_CALL_TARGET 295 .endif 296 #endif 297 .endm 298 299 .macro const name, export=0, align=2 300 .macro endconst 301 #ifdef __ELF__ 302 .size \name, . - \name 303 #endif 304 .purgem endconst 305 .endm 306 #if defined(_WIN32) 307 .section .rdata 308 #elif !defined(__MACH__) 309 .section .rodata 310 #else 311 .const_data 312 #endif 313 .align \align 314 .if \export 315 .global EXTERN\name 316 #ifdef __ELF__ 317 .hidden EXTERN\name 318 #elif defined(__MACH__) 319 .private_extern EXTERN\name 320 #endif 321 EXTERN\name: 322 .endif 323 \name: 324 .endm 325 326 .macro jumptable name 327 #ifdef _WIN32 328 // MS armasm64 doesn't seem to be able to create relocations for subtraction 329 // of labels in different sections; for armasm64 (and all of Windows for 330 // simplicity), write the jump table in the text section, to allow calculating 331 // differences at assembly time. See 332 // https://developercommunity.visualstudio.com/t/armasm64-unable-to-create-cross-section/10722340 333 // for reference. (LLVM can create such relocations, but checking for _WIN32 334 // for simplicity, as execute-only memory isn't relevant on Windows at the 335 // moment.) 336 function \name 337 #else 338 // For other platforms, write jump tables in a const data section, to allow 339 // working in environments where executable memory isn't readable. 340 const \name 341 #endif 342 .endm 343 344 .macro endjumptable 345 #ifdef _WIN32 346 endfunc 347 #else 348 endconst 349 #endif 350 .endm 351 352 #ifdef __APPLE__ 353 #define L(x) L ## x 354 #else 355 #define L(x) .L ## x 356 #endif 357 358 #define X(x) CONCAT(EXTERN, x) 359 360 361 #endif /* DAV1D_SRC_ARM_ASM_S */