21 #include "emmintrin.h"
23 #include "tmmintrin.h"
34 template<
int BYTES>
static inline __m128i align(__m128i high, __m128i low)
37 return _mm_alignr_epi8(high, low, BYTES);
40 _mm_slli_si128(high,
sizeof(__m128i) - BYTES),
41 _mm_srli_si128(low, BYTES));
47 static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
58 return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
62 template<
typename Pixel>
static inline __m128i isEqual(__m128i x, __m128i y)
64 if (
sizeof(
Pixel) == 4) {
65 return _mm_cmpeq_epi32(x, y);
66 }
else if (
sizeof(
Pixel) == 2) {
67 return _mm_cmpeq_epi16(x, y);
72 template<
typename Pixel>
static inline __m128i unpacklo(__m128i x, __m128i y)
74 if (
sizeof(
Pixel) == 4) {
75 return _mm_unpacklo_epi32(x, y);
76 }
else if (
sizeof(
Pixel) == 2) {
77 return _mm_unpacklo_epi16(x, y);
82 template<
typename Pixel>
static inline __m128i unpackhi(__m128i x, __m128i y)
84 if (
sizeof(
Pixel) == 4) {
85 return _mm_unpackhi_epi32(x, y);
86 }
else if (
sizeof(
Pixel) == 2) {
87 return _mm_unpackhi_epi16(x, y);
94 template<
typename Pixel,
bool DOUBLE_X>
static inline void scale1(
95 __m128i top, __m128i bottom,
96 __m128i prev, __m128i mid, __m128i next,
97 __m128i* out0, __m128i* out1)
99 __m128i left = align<
sizeof(__m128i) -
sizeof(
Pixel)>(mid, prev);
100 __m128i right = align< sizeof(Pixel)>(next, mid);
102 __m128i teqb = isEqual<Pixel>(top, bottom);
103 __m128i leqt = isEqual<Pixel>(left, top);
104 __m128i reqt = isEqual<Pixel>(right, top);
105 __m128i leqb = isEqual<Pixel>(left, bottom);
106 __m128i reqb = isEqual<Pixel>(right, bottom);
108 __m128i cnda = _mm_andnot_si128(_mm_or_si128(teqb, reqt), leqt);
109 __m128i cndb = _mm_andnot_si128(_mm_or_si128(teqb, leqt), reqt);
110 __m128i cndc = _mm_andnot_si128(_mm_or_si128(teqb, reqb), leqb);
111 __m128i cndd = _mm_andnot_si128(_mm_or_si128(teqb, leqb), reqb);
113 __m128i a = select(mid, top, cnda);
114 __m128i b = select(mid, top, cndb);
115 __m128i c = select(mid, bottom, cndc);
116 __m128i d = select(mid, bottom, cndd);
119 out0[0] = unpacklo<Pixel>(a, b);
120 out0[1] = unpackhi<Pixel>(a, b);
121 out1[0] = unpacklo<Pixel>(c, d);
122 out1[1] = unpackhi<Pixel>(c, d);
131 template<
bool DOUBLE_X,
typename Pixel>
static inline void scaleSSE(
132 Pixel* __restrict out0_,
133 Pixel* __restrict out1_,
134 const Pixel* __restrict in0_,
135 const Pixel* __restrict in1_,
136 const Pixel* __restrict in2_,
140 assert((reinterpret_cast<long>(in0_ ) %
sizeof(__m128i)) == 0);
141 assert((reinterpret_cast<long>(in1_ ) %
sizeof(__m128i)) == 0);
142 assert((reinterpret_cast<long>(in2_ ) %
sizeof(__m128i)) == 0);
143 assert((reinterpret_cast<long>(out0_) %
sizeof(__m128i)) == 0);
144 assert((reinterpret_cast<long>(out1_) %
sizeof(__m128i)) == 0);
147 width *=
sizeof(
Pixel);
148 assert((width %
sizeof(__m128i)) == 0);
150 width -=
sizeof(__m128i);
152 static const int SHIFT =
sizeof(__m128i) -
sizeof(
Pixel);
153 static const unsigned long SCALE = DOUBLE_X ? 2 : 1;
158 auto* in0 =
reinterpret_cast<const char*
>(in0_ ) + width;
159 auto* in1 =
reinterpret_cast<const char*
>(in1_ ) + width;
160 auto* in2 =
reinterpret_cast<const char*
>(in2_ ) + width;
161 auto* out0 =
reinterpret_cast< char*
>(out0_) + SCALE * width;
162 auto* out1 =
reinterpret_cast< char*
>(out1_) + SCALE * width;
163 long x = -long(width);
166 __m128i next = *
reinterpret_cast<const __m128i*
>(in1 + x);
167 __m128i mid = _mm_slli_si128(next, SHIFT);
171 __m128i top = *
reinterpret_cast<const __m128i*
>(in0 + x);
172 __m128i bottom = *
reinterpret_cast<const __m128i*
>(in2 + x);
175 next = *
reinterpret_cast<const __m128i*
>(in1 + x +
sizeof(__m128i));
176 scale1<Pixel, DOUBLE_X>(top, bottom, prev, mid, next,
177 reinterpret_cast<__m128i*
>(out0 + SCALE * x),
178 reinterpret_cast<__m128i*>(out1 + SCALE * x));
179 x +=
sizeof(__m128i);
184 __m128i top = *
reinterpret_cast<const __m128i*
>(in0);
185 __m128i bottom = *
reinterpret_cast<const __m128i*
>(in2);
188 next = _mm_srli_si128(next, SHIFT);
189 scale1<Pixel, DOUBLE_X>(top, bottom, prev, mid, next,
190 reinterpret_cast<__m128i*
>(out0),
191 reinterpret_cast<__m128i*>(out1));
197 template <
class Pixel>
203 template <
class Pixel>
205 Pixel* __restrict dst0,
Pixel* __restrict dst1,
206 const Pixel* __restrict src0,
const Pixel* __restrict src1,
207 const Pixel* __restrict src2,
unsigned long srcWidth) __restrict
215 scaleSSE<true>(dst0, dst1, src0, src1, src2, srcWidth);
217 scaleLineHalf_1on2(dst0, src0, src1, src2, srcWidth);
218 scaleLineHalf_1on2(dst1, src2, src1, src0, srcWidth);
222 template <
class Pixel>
223 void Scale2xScaler<Pixel>::scaleLineHalf_1on2(
224 Pixel* __restrict dst,
const Pixel* __restrict src0,
225 const Pixel* __restrict src1,
const Pixel* __restrict src2,
226 unsigned long srcWidth) __restrict
237 Pixel right = src1[1];
239 dst[1] = (right == src0[0] && src2[0] != src0[0]) ? src0[0] : mid;
242 for (
unsigned x = 1; x < srcWidth - 1; ++x) {
248 dst[2 * x + 0] = (left == top && right != top && bot != top) ? top : mid;
249 dst[2 * x + 1] = (right == top && left != top && bot != top) ? top : mid;
253 dst[2 * srcWidth - 2] =
254 (mid == src0[srcWidth - 1] && src2[srcWidth - 1] != src0[srcWidth - 1])
255 ? src0[srcWidth - 1] : right;
256 dst[2 * srcWidth - 1] =
260 template <
class Pixel>
261 inline void Scale2xScaler<Pixel>::scaleLine_1on1(
262 Pixel* __restrict dst0,
Pixel* __restrict dst1,
263 const Pixel* __restrict src0,
const Pixel* __restrict src1,
264 const Pixel* __restrict src2,
unsigned long srcWidth) __restrict
267 scaleSSE<false>(dst0, dst1, src0, src1, src2, srcWidth);
269 scaleLineHalf_1on1(dst0, src0, src1, src2, srcWidth);
270 scaleLineHalf_1on1(dst1, src2, src1, src0, srcWidth);
274 template <
class Pixel>
275 void Scale2xScaler<Pixel>::scaleLineHalf_1on1(
276 Pixel* __restrict dst,
const Pixel* __restrict src0,
277 const Pixel* __restrict src1,
const Pixel* __restrict src2,
278 unsigned long srcWidth) __restrict
286 Pixel right = src1[1];
290 for (
unsigned x = 1; x < srcWidth - 1; ++x) {
296 dst[x] = (left == top && right != top && bot != top) ? top : mid;
301 (mid == src0[srcWidth - 1] && src2[srcWidth - 1] != src0[srcWidth - 1])
302 ? src0[srcWidth - 1] : right;
305 template <
class Pixel>
307 unsigned srcStartY,
unsigned ,
unsigned srcWidth,
310 int srcY = srcStartY;
313 for (
unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
317 scaleLine_1on2(dstUpper, dstLower,
318 srcPrev, srcCurr, srcNext,
327 template <
class Pixel>
329 unsigned srcStartY,
unsigned ,
unsigned srcWidth,
332 int srcY = srcStartY;
335 for (
unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
339 scaleLine_1on1(dstUpper, dstLower,
340 srcPrev, srcCurr, srcNext,