19 memset(tab, 0,
sizeof(tab));
29 for (
unsigned p = 0; p < 0x10000; ++p) {
30 tab[p] = ((((p & pixelOps.getRmask()) * f) >> 8) & pixelOps.getRmask()) |
31 ((((p & pixelOps.getGmask()) * f) >> 8) & pixelOps.getGmask()) |
32 ((((p & pixelOps.getBmask()) * f) >> 8) & pixelOps.getBmask());
38 unsigned r = (((p & pixelOps.getRmask()) * f) >> 8) & pixelOps.getRmask();
39 unsigned g = (((p & pixelOps.getGmask()) * f) >> 8) & pixelOps.getGmask();
40 unsigned b = (((p & pixelOps.getBmask()) * f) >> 8) & pixelOps.getBmask();
73 return multiply(p, factor);
85 static inline void drawSSE2_1(
86 const char* __restrict in1,
const char* __restrict in2,
87 char* __restrict out, __m128i f)
89 __m128i zero = _mm_setzero_si128();
90 __m128i a = *
reinterpret_cast<const __m128i*
>(in1);
91 __m128i b = *
reinterpret_cast<const __m128i*
>(in2);
92 __m128i c = _mm_avg_epu8(a, b);
93 __m128i l = _mm_unpacklo_epi8(c, zero);
94 __m128i h = _mm_unpackhi_epi8(c, zero);
95 __m128i m = _mm_mulhi_epu16(l, f);
96 __m128i n = _mm_mulhi_epu16(h, f);
97 __m128i r = _mm_packus_epi16(m, n);
98 *
reinterpret_cast<__m128i*
>(out) = r;
100 static inline void drawSSE2(
101 const uint32_t* __restrict in1_,
102 const uint32_t* __restrict in2_,
103 uint32_t* __restrict out_,
106 PixelOperations<uint32_t>& ,
107 Multiply<uint32_t>& )
109 width *=
sizeof(uint32_t);
111 assert((reinterpret_cast<long>(in1_) %
sizeof(__m128i)) == 0);
112 assert((reinterpret_cast<long>(in2_) %
sizeof(__m128i)) == 0);
113 assert((reinterpret_cast<long>(out_) %
sizeof(__m128i)) == 0);
114 auto* in1 =
reinterpret_cast<const char*
>(in1_) + width;
115 auto* in2 =
reinterpret_cast<const char*
>(in2_) + width;
116 auto* out =
reinterpret_cast< char*
>(out_) + width;
118 __m128i f = _mm_set1_epi16(factor << 8);
119 long x = -long(width);
121 drawSSE2_1(in1 + x + 0, in2 + x + 0, out + x + 0, f);
122 drawSSE2_1(in1 + x + 16, in2 + x + 16, out + x + 16, f);
123 drawSSE2_1(in1 + x + 32, in2 + x + 32, out + x + 32, f);
124 drawSSE2_1(in1 + x + 48, in2 + x + 48, out + x + 48, f);
130 static inline void drawSSE2(
131 const uint16_t* __restrict in1_,
132 const uint16_t* __restrict in2_,
133 uint16_t* __restrict out_,
136 PixelOperations<uint16_t>& pixelOps,
137 Multiply<uint16_t>& darkener)
139 width *=
sizeof(uint16_t);
141 auto* in1 =
reinterpret_cast<const char*
>(in1_) + width;
142 auto* in2 =
reinterpret_cast<const char*
>(in2_) + width;
143 auto* out =
reinterpret_cast< char*
>(out_) + width;
145 darkener.setFactor(factor);
146 const uint16_t* table = darkener.getTable();
147 __m128i mask = _mm_set1_epi16(pixelOps.getBlendMask());
149 long x = -long(width);
151 __m128i a = *
reinterpret_cast<const __m128i*
>(in1 + x);
152 __m128i b = *
reinterpret_cast<const __m128i*
>(in2 + x);
153 __m128i c = _mm_add_epi16(
156 _mm_and_si128(mask, _mm_xor_si128(a, b)),
158 *
reinterpret_cast<__m128i*
>(out + x) = _mm_set_epi16(
159 table[_mm_extract_epi16(c, 7)],
160 table[_mm_extract_epi16(c, 6)],
161 table[_mm_extract_epi16(c, 5)],
162 table[_mm_extract_epi16(c, 4)],
163 table[_mm_extract_epi16(c, 3)],
164 table[_mm_extract_epi16(c, 2)],
165 table[_mm_extract_epi16(c, 1)],
166 table[_mm_extract_epi16(c, 0)]);
192 template <
class Pixel>
194 : darkener(pixelOps_)
195 , pixelOps(pixelOps_)
199 template <
class Pixel>
201 const Pixel* __restrict src1,
const Pixel* __restrict src2,
202 Pixel* __restrict dst,
unsigned factor,
unsigned long width)
205 drawSSE2(src1, src2, dst, factor, width, pixelOps, darkener);
208 darkener.setFactor(factor);
209 for (
unsigned x = 0; x < width; ++x) {
210 dst[x] = darkener.multiply(
211 pixelOps.template blend<1, 1>(src1[x], src2[x]));
216 template <
class Pixel>
219 return darkener.multiply(p, factor);
222 template <
class Pixel>
225 return darkener.multiply(pixelOps.template blend<1, 1>(p1, p2), factor);