yuv_convert_sse2.cpp (1673B)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <emmintrin.h> 6 #include "yuv_row.h" 7 8 namespace mozilla { 9 namespace gfx { 10 11 // FilterRows combines two rows of the image using linear interpolation. 12 // SSE2 version does 16 pixels at a time. 13 void FilterRows_SSE2(uint8_t* ybuf, const uint8_t* y0_ptr, const uint8_t* y1_ptr, 14 int source_width, int source_y_fraction) { 15 __m128i zero = _mm_setzero_si128(); 16 __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); 17 __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); 18 19 const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); 20 const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); 21 __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); 22 __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); 23 24 do { 25 __m128i y0 = _mm_loadu_si128(y0_ptr128); 26 __m128i y1 = _mm_loadu_si128(y1_ptr128); 27 __m128i y2 = _mm_unpackhi_epi8(y0, zero); 28 __m128i y3 = _mm_unpackhi_epi8(y1, zero); 29 y0 = _mm_unpacklo_epi8(y0, zero); 30 y1 = _mm_unpacklo_epi8(y1, zero); 31 y0 = _mm_mullo_epi16(y0, y0_fraction); 32 y1 = _mm_mullo_epi16(y1, y1_fraction); 33 y2 = _mm_mullo_epi16(y2, y0_fraction); 34 y3 = _mm_mullo_epi16(y3, y1_fraction); 35 y0 = _mm_add_epi16(y0, y1); 36 y2 = _mm_add_epi16(y2, y3); 37 y0 = _mm_srli_epi16(y0, 8); 38 y2 = _mm_srli_epi16(y2, 8); 39 y0 = _mm_packus_epi16(y0, y2); 40 *dest128++ = y0; 41 ++y0_ptr128; 42 ++y1_ptr128; 43 } while (dest128 < end128); 44 } 45 46 } // namespace gfx 47 } // namespace mozilla