copyvertex.inc.h (23236B)
1 // 2 // Copyright 2014 The ANGLE Project Authors. All rights reserved. 3 // Use of this source code is governed by a BSD-style license that can be 4 // found in the LICENSE file. 5 // 6 7 // copyvertex.inc.h: Implementation of vertex buffer copying and conversion functions 8 9 namespace rx 10 { 11 12 // Returns an aligned buffer to read the input from 13 template <typename T, size_t inputComponentCount> 14 inline const T *GetAlignedOffsetInput(const T *offsetInput, T *alignedElement) 15 { 16 if (reinterpret_cast<uintptr_t>(offsetInput) % sizeof(T) != 0) 17 { 18 // Applications may pass in arbitrarily aligned buffers as input. 19 // Certain architectures have restrictions regarding unaligned reads. Specifically, we crash 20 // on armeabi-v7a devices with a SIGBUS error when performing such operations. arm64 and 21 // x86-64 devices do not appear to have such issues. 22 // 23 // The workaround is to detect if the input buffer is unaligned and if so, perform a 24 // byte-wise copy of the unaligned portion and a memcpy of the rest of the buffer. 25 uint8_t *alignedBuffer = reinterpret_cast<uint8_t *>(&alignedElement[0]); 26 uintptr_t unalignedInputStartAddress = reinterpret_cast<uintptr_t>(offsetInput); 27 constexpr size_t kAlignmentMinusOne = sizeof(T) - 1; 28 uintptr_t alignedInputStartAddress = 29 (reinterpret_cast<uintptr_t>(offsetInput) + kAlignmentMinusOne) & ~(kAlignmentMinusOne); 30 ASSERT(alignedInputStartAddress >= unalignedInputStartAddress); 31 32 const size_t totalBytesToCopy = sizeof(T) * inputComponentCount; 33 const size_t unalignedBytesToCopy = alignedInputStartAddress - unalignedInputStartAddress; 34 ASSERT(totalBytesToCopy >= unalignedBytesToCopy); 35 36 // byte-wise copy of unaligned portion 37 for (size_t i = 0; i < unalignedBytesToCopy; i++) 38 { 39 alignedBuffer[i] = reinterpret_cast<const uint8_t *>(&offsetInput[0])[i]; 40 } 41 42 // memcpy remaining buffer 43 memcpy(&alignedBuffer[unalignedBytesToCopy], 44 &reinterpret_cast<const uint8_t *>(&offsetInput[0])[unalignedBytesToCopy], 45 totalBytesToCopy - unalignedBytesToCopy); 46 47 return alignedElement; 48 } 49 else 50 { 51 return offsetInput; 52 } 53 } 54 55 template <typename T, 56 size_t inputComponentCount, 57 size_t outputComponentCount, 58 uint32_t alphaDefaultValueBits> 59 inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output) 60 { 61 const size_t attribSize = sizeof(T) * inputComponentCount; 62 63 if (attribSize == stride && inputComponentCount == outputComponentCount) 64 { 65 memcpy(output, input, count * attribSize); 66 return; 67 } 68 69 if (inputComponentCount == outputComponentCount) 70 { 71 for (size_t i = 0; i < count; i++) 72 { 73 const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride)); 74 T offsetInputAligned[inputComponentCount]; 75 offsetInput = 76 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]); 77 78 T *offsetOutput = reinterpret_cast<T *>(output) + i * outputComponentCount; 79 80 memcpy(offsetOutput, offsetInput, attribSize); 81 } 82 return; 83 } 84 85 const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits); 86 const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3); 87 88 for (size_t i = 0; i < count; i++) 89 { 90 const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride)); 91 T offsetInputAligned[inputComponentCount]; 92 ASSERT(sizeof(offsetInputAligned) == attribSize); 93 offsetInput = 94 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]); 95 96 T *offsetOutput = reinterpret_cast<T *>(output) + i * outputComponentCount; 97 98 memcpy(offsetOutput, offsetInput, attribSize); 99 100 if (inputComponentCount < lastNonAlphaOutputComponent) 101 { 102 // Set the remaining G/B channels to 0. 103 size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount); 104 memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T)); 105 } 106 107 if (inputComponentCount < outputComponentCount && outputComponentCount == 4) 108 { 109 // Set the remaining alpha channel to the defaultAlphaValue. 110 offsetOutput[3] = defaultAlphaValue; 111 } 112 } 113 } 114 115 template <size_t inputComponentCount, size_t outputComponentCount> 116 inline void Copy8SintTo16SintVertexData(const uint8_t *input, 117 size_t stride, 118 size_t count, 119 uint8_t *output) 120 { 121 const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3); 122 123 for (size_t i = 0; i < count; i++) 124 { 125 const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride); 126 GLshort *offsetOutput = reinterpret_cast<GLshort *>(output) + i * outputComponentCount; 127 128 for (size_t j = 0; j < inputComponentCount; j++) 129 { 130 offsetOutput[j] = static_cast<GLshort>(offsetInput[j]); 131 } 132 133 for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++) 134 { 135 // Set remaining G/B channels to 0. 136 offsetOutput[j] = 0; 137 } 138 139 if (inputComponentCount < outputComponentCount && outputComponentCount == 4) 140 { 141 // On integer formats, we must set the Alpha channel to 1 if it's unused. 142 offsetOutput[3] = 1; 143 } 144 } 145 } 146 147 template <size_t inputComponentCount, size_t outputComponentCount> 148 inline void Copy8SnormTo16SnormVertexData(const uint8_t *input, 149 size_t stride, 150 size_t count, 151 uint8_t *output) 152 { 153 for (size_t i = 0; i < count; i++) 154 { 155 const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride); 156 GLshort *offsetOutput = reinterpret_cast<GLshort *>(output) + i * outputComponentCount; 157 158 for (size_t j = 0; j < inputComponentCount; j++) 159 { 160 // The original GLbyte value ranges from -128 to +127 (INT8_MAX). 161 // When converted to GLshort, the value must be scaled to between -32768 and +32767 162 // (INT16_MAX). 163 if (offsetInput[j] > 0) 164 { 165 offsetOutput[j] = 166 offsetInput[j] << 8 | offsetInput[j] << 1 | ((offsetInput[j] & 0x40) >> 6); 167 } 168 else 169 { 170 offsetOutput[j] = offsetInput[j] << 8; 171 } 172 } 173 174 for (size_t j = inputComponentCount; j < std::min<size_t>(outputComponentCount, 3); j++) 175 { 176 // Set remaining G/B channels to 0. 177 offsetOutput[j] = 0; 178 } 179 180 if (inputComponentCount < outputComponentCount && outputComponentCount == 4) 181 { 182 // On normalized formats, we must set the Alpha channel to the max value if it's unused. 183 offsetOutput[3] = INT16_MAX; 184 } 185 } 186 } 187 188 template <size_t inputComponentCount, size_t outputComponentCount> 189 inline void Copy32FixedTo32FVertexData(const uint8_t *input, 190 size_t stride, 191 size_t count, 192 uint8_t *output) 193 { 194 static const float divisor = 1.0f / (1 << 16); 195 196 for (size_t i = 0; i < count; i++) 197 { 198 const uint8_t *offsetInput = input + i * stride; 199 float *offsetOutput = reinterpret_cast<float *>(output) + i * outputComponentCount; 200 201 // GLfixed access must be 4-byte aligned on arm32, input and stride sometimes are not 202 if (reinterpret_cast<uintptr_t>(offsetInput) % sizeof(GLfixed) == 0) 203 { 204 for (size_t j = 0; j < inputComponentCount; j++) 205 { 206 offsetOutput[j] = 207 static_cast<float>(reinterpret_cast<const GLfixed *>(offsetInput)[j]) * divisor; 208 } 209 } 210 else 211 { 212 for (size_t j = 0; j < inputComponentCount; j++) 213 { 214 GLfixed alignedInput; 215 memcpy(&alignedInput, offsetInput + j * sizeof(GLfixed), sizeof(GLfixed)); 216 offsetOutput[j] = static_cast<float>(alignedInput) * divisor; 217 } 218 } 219 220 // 4-component output formats would need special padding in the alpha channel. 221 static_assert(!(inputComponentCount < 4 && outputComponentCount == 4), 222 "An inputComponentCount less than 4 and an outputComponentCount equal to 4 " 223 "is not supported."); 224 225 for (size_t j = inputComponentCount; j < outputComponentCount; j++) 226 { 227 offsetOutput[j] = 0.0f; 228 } 229 } 230 } 231 232 template <typename T, 233 size_t inputComponentCount, 234 size_t outputComponentCount, 235 bool normalized, 236 bool toHalf> 237 inline void CopyToFloatVertexData(const uint8_t *input, 238 size_t stride, 239 size_t count, 240 uint8_t *output) 241 { 242 typedef std::numeric_limits<T> NL; 243 typedef typename std::conditional<toHalf, GLhalf, float>::type outputType; 244 245 for (size_t i = 0; i < count; i++) 246 { 247 const T *offsetInput = reinterpret_cast<const T *>(input + (stride * i)); 248 outputType *offsetOutput = 249 reinterpret_cast<outputType *>(output) + i * outputComponentCount; 250 251 T offsetInputAligned[inputComponentCount]; 252 offsetInput = 253 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]); 254 255 for (size_t j = 0; j < inputComponentCount; j++) 256 { 257 float result = 0; 258 259 if (normalized) 260 { 261 if (NL::is_signed) 262 { 263 result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max()); 264 result = result >= -1.0f ? result : -1.0f; 265 } 266 else 267 { 268 result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max()); 269 } 270 } 271 else 272 { 273 result = static_cast<float>(offsetInput[j]); 274 } 275 276 if (toHalf) 277 { 278 offsetOutput[j] = gl::float32ToFloat16(result); 279 } 280 else 281 { 282 offsetOutput[j] = static_cast<outputType>(result); 283 } 284 } 285 286 for (size_t j = inputComponentCount; j < outputComponentCount; j++) 287 { 288 offsetOutput[j] = 0; 289 } 290 291 if (inputComponentCount < 4 && outputComponentCount == 4) 292 { 293 if (toHalf) 294 { 295 offsetOutput[3] = gl::Float16One; 296 } 297 else 298 { 299 offsetOutput[3] = static_cast<outputType>(gl::Float32One); 300 } 301 } 302 } 303 } 304 305 template <size_t inputComponentCount, size_t outputComponentCount> 306 void Copy32FTo16FVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output) 307 { 308 const unsigned short kZero = gl::float32ToFloat16(0.0f); 309 const unsigned short kOne = gl::float32ToFloat16(1.0f); 310 311 for (size_t i = 0; i < count; i++) 312 { 313 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i)); 314 unsigned short *offsetOutput = 315 reinterpret_cast<unsigned short *>(output) + i * outputComponentCount; 316 317 for (size_t j = 0; j < inputComponentCount; j++) 318 { 319 offsetOutput[j] = gl::float32ToFloat16(offsetInput[j]); 320 } 321 322 for (size_t j = inputComponentCount; j < outputComponentCount; j++) 323 { 324 offsetOutput[j] = (j == 3) ? kOne : kZero; 325 } 326 } 327 } 328 329 inline void CopyXYZ32FToXYZ9E5(const uint8_t *input, size_t stride, size_t count, uint8_t *output) 330 { 331 for (size_t i = 0; i < count; i++) 332 { 333 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i)); 334 unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i; 335 336 *offsetOutput = gl::convertRGBFloatsTo999E5(offsetInput[0], offsetInput[1], offsetInput[2]); 337 } 338 } 339 340 inline void CopyXYZ32FToX11Y11B10F(const uint8_t *input, 341 size_t stride, 342 size_t count, 343 uint8_t *output) 344 { 345 for (size_t i = 0; i < count; i++) 346 { 347 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i)); 348 unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i; 349 350 *offsetOutput = gl::float32ToFloat11(offsetInput[0]) << 0 | 351 gl::float32ToFloat11(offsetInput[1]) << 11 | 352 gl::float32ToFloat10(offsetInput[2]) << 22; 353 } 354 } 355 356 namespace priv 357 { 358 359 template <bool isSigned, bool normalized, bool toFloat, bool toHalf> 360 static inline void CopyPackedRGB(uint32_t data, uint8_t *output) 361 { 362 const uint32_t rgbSignMask = 0x200; // 1 set at the 9 bit 363 const uint32_t negativeMask = 0xFFFFFC00; // All bits from 10 to 31 set to 1 364 365 if (toFloat || toHalf) 366 { 367 GLfloat finalValue = static_cast<GLfloat>(data); 368 if (isSigned) 369 { 370 if (data & rgbSignMask) 371 { 372 int negativeNumber = data | negativeMask; 373 finalValue = static_cast<GLfloat>(negativeNumber); 374 } 375 376 if (normalized) 377 { 378 const int32_t maxValue = 0x1FF; // 1 set in bits 0 through 8 379 const int32_t minValue = 0xFFFFFE01; // Inverse of maxValue 380 381 // A 10-bit two's complement number has the possibility of being minValue - 1 but 382 // OpenGL's normalization rules dictate that it should be clamped to minValue in 383 // this case. 384 if (finalValue < minValue) 385 { 386 finalValue = minValue; 387 } 388 389 const int32_t halfRange = (maxValue - minValue) >> 1; 390 finalValue = ((finalValue - minValue) / halfRange) - 1.0f; 391 } 392 } 393 else 394 { 395 if (normalized) 396 { 397 const uint32_t maxValue = 0x3FF; // 1 set in bits 0 through 9 398 finalValue /= static_cast<GLfloat>(maxValue); 399 } 400 } 401 402 if (toHalf) 403 { 404 *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue); 405 } 406 else 407 { 408 *reinterpret_cast<GLfloat *>(output) = finalValue; 409 } 410 } 411 else 412 { 413 if (isSigned) 414 { 415 GLshort *intOutput = reinterpret_cast<GLshort *>(output); 416 417 if (data & rgbSignMask) 418 { 419 *intOutput = static_cast<GLshort>(data | negativeMask); 420 } 421 else 422 { 423 *intOutput = static_cast<GLshort>(data); 424 } 425 } 426 else 427 { 428 GLushort *uintOutput = reinterpret_cast<GLushort *>(output); 429 *uintOutput = static_cast<GLushort>(data); 430 } 431 } 432 } 433 434 template <bool isSigned, bool normalized, bool toFloat, bool toHalf> 435 inline void CopyPackedAlpha(uint32_t data, uint8_t *output) 436 { 437 ASSERT(data >= 0 && data <= 3); 438 439 if (toFloat || toHalf) 440 { 441 GLfloat finalValue = 0; 442 if (isSigned) 443 { 444 if (normalized) 445 { 446 switch (data) 447 { 448 case 0x0: 449 finalValue = 0.0f; 450 break; 451 case 0x1: 452 finalValue = 1.0f; 453 break; 454 case 0x2: 455 finalValue = -1.0f; 456 break; 457 case 0x3: 458 finalValue = -1.0f; 459 break; 460 default: 461 UNREACHABLE(); 462 } 463 } 464 else 465 { 466 switch (data) 467 { 468 case 0x0: 469 finalValue = 0.0f; 470 break; 471 case 0x1: 472 finalValue = 1.0f; 473 break; 474 case 0x2: 475 finalValue = -2.0f; 476 break; 477 case 0x3: 478 finalValue = -1.0f; 479 break; 480 default: 481 UNREACHABLE(); 482 } 483 } 484 } 485 else 486 { 487 if (normalized) 488 { 489 finalValue = data / 3.0f; 490 } 491 else 492 { 493 finalValue = static_cast<float>(data); 494 } 495 } 496 497 if (toHalf) 498 { 499 *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue); 500 } 501 else 502 { 503 *reinterpret_cast<GLfloat *>(output) = finalValue; 504 } 505 } 506 else 507 { 508 if (isSigned) 509 { 510 GLshort *intOutput = reinterpret_cast<GLshort *>(output); 511 switch (data) 512 { 513 case 0x0: 514 *intOutput = 0; 515 break; 516 case 0x1: 517 *intOutput = 1; 518 break; 519 case 0x2: 520 *intOutput = -2; 521 break; 522 case 0x3: 523 *intOutput = -1; 524 break; 525 default: 526 UNREACHABLE(); 527 } 528 } 529 else 530 { 531 *reinterpret_cast<GLushort *>(output) = static_cast<GLushort>(data); 532 } 533 } 534 } 535 536 } // namespace priv 537 538 template <bool isSigned, bool normalized, bool toFloat, bool toHalf> 539 inline void CopyXYZ10W2ToXYZWFloatVertexData(const uint8_t *input, 540 size_t stride, 541 size_t count, 542 uint8_t *output) 543 { 544 const size_t outputComponentSize = toFloat && !toHalf ? 4 : 2; 545 const size_t componentCount = 4; 546 547 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9 548 const size_t redShift = 0; // red is bits 0 through 9 549 const size_t greenShift = 10; // green is bits 10 through 19 550 const size_t blueShift = 20; // blue is bits 20 through 29 551 552 const uint32_t alphaMask = 0x3; // 1 set in bits 0 and 1 553 const size_t alphaShift = 30; // Alpha is the 30 and 31 bits 554 555 for (size_t i = 0; i < count; i++) 556 { 557 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride)); 558 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount); 559 560 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>( 561 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize)); 562 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>( 563 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize)); 564 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>( 565 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize)); 566 priv::CopyPackedAlpha<isSigned, normalized, toFloat, toHalf>( 567 (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize)); 568 } 569 } 570 571 template <bool isSigned, bool normalized, bool toHalf> 572 inline void CopyXYZ10ToXYZWFloatVertexData(const uint8_t *input, 573 size_t stride, 574 size_t count, 575 uint8_t *output) 576 { 577 const size_t outputComponentSize = toHalf ? 2 : 4; 578 const size_t componentCount = 4; 579 580 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9 581 const size_t redShift = 22; // red is bits 22 through 31 582 const size_t greenShift = 12; // green is bits 12 through 21 583 const size_t blueShift = 2; // blue is bits 2 through 11 584 585 const uint32_t alphaDefaultValueBits = normalized ? (isSigned ? 0x1 : 0x3) : 0x1; 586 587 for (size_t i = 0; i < count; i++) 588 { 589 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride)); 590 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount); 591 592 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 593 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize)); 594 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 595 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize)); 596 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 597 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize)); 598 priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>( 599 alphaDefaultValueBits, offsetOutput + (3 * outputComponentSize)); 600 } 601 } 602 603 template <bool isSigned, bool normalized, bool toHalf> 604 inline void CopyW2XYZ10ToXYZWFloatVertexData(const uint8_t *input, 605 size_t stride, 606 size_t count, 607 uint8_t *output) 608 { 609 const size_t outputComponentSize = toHalf ? 2 : 4; 610 const size_t componentCount = 4; 611 612 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9 613 const size_t redShift = 22; // red is bits 22 through 31 614 const size_t greenShift = 12; // green is bits 12 through 21 615 const size_t blueShift = 2; // blue is bits 2 through 11 616 617 const uint32_t alphaMask = 0x3; // 1 set in bits 0 and 1 618 const size_t alphaShift = 0; // Alpha is the 30 and 31 bits 619 620 for (size_t i = 0; i < count; i++) 621 { 622 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride)); 623 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount); 624 625 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 626 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize)); 627 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 628 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize)); 629 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>( 630 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize)); 631 priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>( 632 (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize)); 633 } 634 } 635 } // namespace rx