openMSX
LineScalers.hh
Go to the documentation of this file.
1#ifndef LINESCALERS_HH
2#define LINESCALERS_HH
3
4#include "PixelOperations.hh"
5#include "ranges.hh"
6#include "view.hh"
7#include "xrange.hh"
8#include <cassert>
9#include <cstddef>
10#include <cstdint>
11#include <span>
12#ifdef __SSE2__
13#include "emmintrin.h"
14#endif
15
16namespace openmsx {
17
18using Pixel = uint32_t;
19
26void scale_1on3(std::span<const Pixel> in, std::span<Pixel> out);
27void scale_1on4(std::span<const Pixel> in, std::span<Pixel> out);
28void scale_1on6(std::span<const Pixel> in, std::span<Pixel> out);
29void Scale_1on2(std::span<const Pixel> in, std::span<Pixel> out);
30void scale_2on1(std::span<const Pixel> in, std::span<Pixel> out);
31void scale_6on1(std::span<const Pixel> in, std::span<Pixel> out);
32void scale_4on1(std::span<const Pixel> in, std::span<Pixel> out);
33void scale_3on1(std::span<const Pixel> in, std::span<Pixel> out);
34void scale_3on2(std::span<const Pixel> in, std::span<Pixel> out);
35void scale_3on4(std::span<const Pixel> in, std::span<Pixel> out);
36void scale_3on8(std::span<const Pixel> in, std::span<Pixel> out);
37void scale_2on3(std::span<const Pixel> in, std::span<Pixel> out);
38void scale_4on3(std::span<const Pixel> in, std::span<Pixel> out);
39void scale_8on3(std::span<const Pixel> in, std::span<Pixel> out);
40void scale_2on9(std::span<const Pixel> in, std::span<Pixel> out);
41void scale_4on9(std::span<const Pixel> in, std::span<Pixel> out);
42void scale_8on9(std::span<const Pixel> in, std::span<Pixel> out);
43void scale_4on5(std::span<const Pixel> in, std::span<Pixel> out);
44void scale_7on8(std::span<const Pixel> in, std::span<Pixel> out);
45void scale_9on10(std::span<const Pixel> in, std::span<Pixel> out);
46void scale_17on20(std::span<const Pixel> in, std::span<Pixel> out);
47
55template<unsigned w1 = 1, unsigned w2 = 1>
56void blendLines(std::span<const Pixel> in1, std::span<const Pixel> in2,
57 std::span<Pixel> out);
58
66void alphaBlendLines(std::span<const Pixel> in1, std::span<const Pixel> in2,
67 std::span<Pixel> out);
68void alphaBlendLines(Pixel in1, std::span<const Pixel> in2,
69 std::span<Pixel> out);
70
71
72// implementation
73
74template<unsigned N>
75static inline void scale_1onN(
76 std::span<const Pixel> in, std::span<Pixel> out)
77{
78 auto outWidth = out.size();
79 assert(in.size() == (outWidth / N));
80
81 size_t i = 0, j = 0;
82 for (/* */; i < (outWidth - (N - 1)); i += N, j += 1) {
83 Pixel pix = in[j];
84 for (auto k : xrange(N)) {
85 out[i + k] = pix;
86 }
87 }
88 for (auto k : xrange(N - 1)) {
89 if ((i + k) < outWidth) out[i + k] = 0;
90 }
91}
92
93inline void scale_1on3(std::span<const Pixel> in, std::span<Pixel> out)
94{
95 scale_1onN<3>(in, out);
96}
97
98inline void scale_1on4(std::span<const Pixel> in, std::span<Pixel> out)
99{
100 scale_1onN<4>(in, out);
101}
102
103inline void scale_1on6(std::span<const Pixel> in, std::span<Pixel> out)
104{
105 scale_1onN<6>(in, out);
106}
107
108#ifdef __SSE2__
109inline __m128i unpacklo(__m128i x, __m128i y)
110{
111 // 32bpp
112 return _mm_unpacklo_epi32(x, y);
113}
114inline __m128i unpackhi(__m128i x, __m128i y)
115{
116 // 32bpp
117 return _mm_unpackhi_epi32(x, y);
118}
119
120inline void scale_1on2_SSE(const Pixel* __restrict in_, Pixel* __restrict out_, size_t srcWidth)
121{
122 size_t bytes = srcWidth * sizeof(Pixel);
123 assert((bytes % (4 * sizeof(__m128i))) == 0);
124 assert(bytes != 0);
125
126 const auto* in = reinterpret_cast<const char*>(in_) + bytes;
127 auto* out = reinterpret_cast< char*>(out_) + 2 * bytes;
128
129 auto x = -ptrdiff_t(bytes);
130 do {
131 __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 0));
132 __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 16));
133 __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 32));
134 __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 48));
135 __m128i l0 = unpacklo(a0, a0);
136 __m128i h0 = unpackhi(a0, a0);
137 __m128i l1 = unpacklo(a1, a1);
138 __m128i h1 = unpackhi(a1, a1);
139 __m128i l2 = unpacklo(a2, a2);
140 __m128i h2 = unpackhi(a2, a2);
141 __m128i l3 = unpacklo(a3, a3);
142 __m128i h3 = unpackhi(a3, a3);
143 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 0), l0);
144 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 16), h0);
145 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 32), l1);
146 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 48), h1);
147 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 64), l2);
148 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 80), h2);
149 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 96), l3);
150 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 112), h3);
151 x += 4 * sizeof(__m128i);
152 } while (x < 0);
153}
154#endif
155
156inline void scale_1on2(std::span<const Pixel> in, std::span<Pixel> out)
157{
158 // This is a fairly simple algorithm (output each input pixel twice).
159 // An ideal compiler should generate optimal (vector) code for it.
160 // I checked the 2013-05-29 dev snapshots of gcc-4.9 and clang-3.4:
161 // - Clang is not able to vectorize this loop. My best tuned C version
162 // of this routine is a little over 4x slower than the tuned
163 // SSE-intrinsics version.
164 // - Gcc can auto-vectorize this routine. Though my best tuned version
165 // (I mean tuned to further improve the auto-vectorization, including
166 // using the new __builtin_assume_aligned() intrinsic) still runs
167 // approx 40% slower than the intrinsics version.
168 // Hopefully in some years the compilers have improved further so that
169 // the intrinsic version is no longer needed.
170 auto srcWidth = in.size();
171 assert(out.size() == 2 * srcWidth);
172
173#ifdef __SSE2__
174 size_t chunk = 4 * sizeof(__m128i) / sizeof(Pixel);
175 size_t srcWidth2 = srcWidth & ~(chunk - 1);
176 scale_1on2_SSE(in.data(), out.data(), srcWidth2);
177 in = in .subspan( srcWidth2);
178 out = out.subspan(2 * srcWidth2);
179 srcWidth -= srcWidth2;
180#endif
181
182 // C++ version. Used both on non-x86 machines and (possibly) on x86 for
183 // the last few pixels of the line.
184 for (auto x : xrange(srcWidth)) {
185 out[x * 2] = out[x * 2 + 1] = in[x];
186 }
187}
188
189#ifdef __SSE2__
190template<int IMM8> static inline __m128i shuffle(__m128i x, __m128i y)
191{
192 return _mm_castps_si128(_mm_shuffle_ps(
193 _mm_castsi128_ps(x), _mm_castsi128_ps(y), IMM8));
194}
195
196inline __m128i blend(__m128i x, __m128i y)
197{
198 // 32bpp
199 __m128i p = shuffle<0x88>(x, y);
200 __m128i q = shuffle<0xDD>(x, y);
201 return _mm_avg_epu8(p, q);
202}
203
204inline void scale_2on1_SSE(
205 const Pixel* __restrict in_, Pixel* __restrict out_, size_t dstBytes)
206{
207 assert((dstBytes % (4 * sizeof(__m128i))) == 0);
208 assert(dstBytes != 0);
209
210 const auto* in = reinterpret_cast<const char*>(in_) + 2 * dstBytes;
211 auto* out = reinterpret_cast< char*>(out_) + dstBytes;
212
213 auto x = -ptrdiff_t(dstBytes);
214 do {
215 __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 0));
216 __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 16));
217 __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 32));
218 __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 48));
219 __m128i a4 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 64));
220 __m128i a5 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 80));
221 __m128i a6 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 96));
222 __m128i a7 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 112));
223 __m128i b0 = blend(a0, a1);
224 __m128i b1 = blend(a2, a3);
225 __m128i b2 = blend(a4, a5);
226 __m128i b3 = blend(a6, a7);
227 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 0), b0);
228 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 16), b1);
229 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 32), b2);
230 _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 48), b3);
231 x += 4 * sizeof(__m128i);
232 } while (x < 0);
233}
234#endif
235
236inline void scale_2on1(std::span<const Pixel> in, std::span<Pixel> out)
237{
238 assert(in.size() == 2 * out.size());
239 auto outWidth = out.size();
240#ifdef __SSE2__
241 auto n64 = (outWidth * sizeof(Pixel)) & ~63;
242 scale_2on1_SSE(in.data(), out.data(), n64); // process 64 byte chunks
243 outWidth &= ((64 / sizeof(Pixel)) - 1); // remaining pixels (if any)
244 if (outWidth == 0) [[likely]] return;
245 in = in .subspan(2 * n64 / sizeof(Pixel));
246 out = out.subspan( n64 / sizeof(Pixel));
247 // fallthrough to c++ version
248#endif
249 // pure C++ version
250 PixelOperations pixelOps;
251 for (auto i : xrange(outWidth)) {
252 out[i] = pixelOps.template blend<1, 1>(
253 in[2 * i + 0], in[2 * i + 1]);
254 }
255}
256
257inline void scale_6on1(std::span<const Pixel> in, std::span<Pixel> out)
258{
259 assert(in.size() == 6 * out.size());
260 PixelOperations pixelOps;
261 for (auto i : xrange(out.size())) {
262 out[i] = pixelOps.template blend<1, 1, 1, 1, 1, 1>(subspan<6>(in, 6 * i));
263 }
264}
265
266inline void scale_4on1(std::span<const Pixel> in, std::span<Pixel> out)
267{
268 assert(in.size() == 4 * out.size());
269 PixelOperations pixelOps;
270 for (auto i : xrange(out.size())) {
271 out[i] = pixelOps.template blend<1, 1, 1, 1>(subspan<4>(in, 4 * i));
272 }
273}
274
275inline void scale_3on1(std::span<const Pixel> in, std::span<Pixel> out)
276{
277 assert(in.size() == 3 * out.size());
278 PixelOperations pixelOps;
279 for (auto i : xrange(out.size())) {
280 out[i] = pixelOps.template blend<1, 1, 1>(subspan<3>(in, 3 * i));
281 }
282}
283
284inline void scale_3on2(std::span<const Pixel> in, std::span<Pixel> out)
285{
286 assert((in.size() / 3) == (out.size() / 2));
287 PixelOperations pixelOps;
288 size_t n = out.size();
289 size_t i = 0, j = 0;
290 for (/* */; i < (n - 1); i += 2, j += 3) {
291 out[i + 0] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 0));
292 out[i + 1] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 1));
293 }
294 if (i < n) out[i] = 0;
295}
296
297inline void scale_3on4(std::span<const Pixel> in, std::span<Pixel> out)
298{
299 assert((in.size() / 3) == (out.size() / 4));
300 PixelOperations pixelOps;
301 size_t n = out.size();
302 size_t i = 0, j = 0;
303 for (/* */; i < (n - 3); i += 4, j += 3) {
304 out[i + 0] = in[j + 0];
305 out[i + 1] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 0));
306 out[i + 2] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 1));
307 out[i + 3] = in[j + 2];
308 }
309 for (auto k : xrange(4 - 1)) {
310 if ((i + k) < n) out[i + k] = 0;
311 }
312}
313
314inline void scale_3on8(std::span<const Pixel> in, std::span<Pixel> out)
315{
316 assert((in.size() / 3) == (out.size() / 8));
317 PixelOperations pixelOps;
318 size_t n = out.size();
319 size_t i = 0, j = 0;
320 for (/* */; i < (n - 7); i += 8, j += 3) {
321 out[i + 0] = in[j + 0];
322 out[i + 1] = in[j + 0];
323 out[i + 2] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 0));
324 out[i + 3] = in[j + 1];
325 out[i + 4] = in[j + 1];
326 out[i + 5] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 1));
327 out[i + 6] = in[j + 2];
328 out[i + 7] = in[j + 2];
329 }
330 for (auto k : xrange(8 - 1)) {
331 if ((i + k) < n) out[i + k] = 0;
332 }
333}
334
335inline void scale_2on3(std::span<const Pixel> in, std::span<Pixel> out)
336{
337 assert((in.size() / 2) == (out.size() / 3));
338 PixelOperations pixelOps;
339 size_t n = out.size();
340 size_t i = 0, j = 0;
341 for (/* */; i < (n - 2); i += 3, j += 2) {
342 out[i + 0] = in[j + 0];
343 out[i + 1] = pixelOps.template blend<1, 1>(subspan<2>(in, j));
344 out[i + 2] = in[j + 1];
345 }
346 if ((i + 0) < n) out[i + 0] = 0;
347 if ((i + 1) < n) out[i + 1] = 0;
348}
349
350inline void scale_4on3(std::span<const Pixel> in, std::span<Pixel> out)
351{
352 assert((in.size() / 4) == (out.size() / 3));
353 PixelOperations pixelOps;
354 size_t n = out.size();
355 size_t i = 0, j = 0;
356 for (/* */; i < (n - 2); i += 3, j += 4) {
357 out[i + 0] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 0));
358 out[i + 1] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
359 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 2));
360 }
361 if ((i + 0) < n) out[i + 0] = 0;
362 if ((i + 1) < n) out[i + 1] = 0;
363}
364
365inline void scale_8on3(std::span<const Pixel> in, std::span<Pixel> out)
366{
367 assert((in.size() / 8) == (out.size() / 3));
368 PixelOperations pixelOps;
369 size_t n = out.size();
370 size_t i = 0, j = 0;
371 for (/* */; i < (n - 2); i += 3, j += 8) {
372 out[i + 0] = pixelOps.template blend<3, 3, 2> (subspan<3>(in, j + 0));
373 out[i + 1] = pixelOps.template blend<1, 3, 3, 1>(subspan<4>(in, j + 2));
374 out[i + 2] = pixelOps.template blend<2, 3, 3> (subspan<3>(in, j + 5));
375 }
376 if ((i + 0) < n) out[i + 0] = 0;
377 if ((i + 1) < n) out[i + 1] = 0;
378}
379
380inline void scale_2on9(std::span<const Pixel> in, std::span<Pixel> out)
381{
382 assert((in.size() / 2) == (out.size() / 9));
383 PixelOperations pixelOps;
384 size_t n = out.size();
385 size_t i = 0, j = 0;
386 for (/* */; i < (n - 8); i += 9, j += 2) {
387 out[i + 0] = in[j + 0];
388 out[i + 1] = in[j + 0];
389 out[i + 2] = in[j + 0];
390 out[i + 3] = in[j + 0];
391 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j));
392 out[i + 5] = in[j + 1];
393 out[i + 6] = in[j + 1];
394 out[i + 7] = in[j + 1];
395 out[i + 8] = in[j + 1];
396 }
397 if ((i + 0) < n) out[i + 0] = 0;
398 if ((i + 1) < n) out[i + 1] = 0;
399 if ((i + 2) < n) out[i + 2] = 0;
400 if ((i + 3) < n) out[i + 3] = 0;
401 if ((i + 4) < n) out[i + 4] = 0;
402 if ((i + 5) < n) out[i + 5] = 0;
403 if ((i + 6) < n) out[i + 6] = 0;
404 if ((i + 7) < n) out[i + 7] = 0;
405}
406
407inline void scale_4on9(std::span<const Pixel> in, std::span<Pixel> out)
408{
409 assert((in.size() / 4) == (out.size() / 9));
410 PixelOperations pixelOps;
411 size_t n = out.size();
412 size_t i = 0, j = 0;
413 for (/* */; i < (n - 8); i += 9, j += 4) {
414 out[i + 0] = in[j + 0];
415 out[i + 1] = in[j + 0];
416 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 0));
417 out[i + 3] = in[j + 1];
418 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
419 out[i + 5] = in[j + 2];
420 out[i + 6] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 2));
421 out[i + 7] = in[j + 3];
422 out[i + 8] = in[j + 3];
423 }
424 if ((i + 0) < n) out[i + 0] = 0;
425 if ((i + 1) < n) out[i + 1] = 0;
426 if ((i + 2) < n) out[i + 2] = 0;
427 if ((i + 3) < n) out[i + 3] = 0;
428 if ((i + 4) < n) out[i + 4] = 0;
429 if ((i + 5) < n) out[i + 5] = 0;
430 if ((i + 6) < n) out[i + 6] = 0;
431 if ((i + 7) < n) out[i + 7] = 0;
432}
433
434inline void scale_8on9(std::span<const Pixel> in, std::span<Pixel> out)
435{
436 assert((in.size() / 8) == (out.size() / 9));
437 PixelOperations pixelOps;
438 size_t n = out.size();
439 size_t i = 0, j = 0;
440 for (/* */; i < (n - 8); i += 9, j += 8) {
441 out[i + 0] = in[j + 0];
442 out[i + 1] = pixelOps.template blend<1, 7>(subspan<2>(in, j + 0));
443 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 1));
444 out[i + 3] = pixelOps.template blend<3, 5>(subspan<2>(in, j + 2));
445 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 3));
446 out[i + 5] = pixelOps.template blend<5, 3>(subspan<2>(in, j + 4));
447 out[i + 6] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 5));
448 out[i + 7] = pixelOps.template blend<7, 1>(subspan<2>(in, j + 6));
449 out[i + 8] = in[j + 7];
450 }
451 if ((i + 0) < n) out[i + 0] = 0;
452 if ((i + 1) < n) out[i + 1] = 0;
453 if ((i + 2) < n) out[i + 2] = 0;
454 if ((i + 3) < n) out[i + 3] = 0;
455 if ((i + 4) < n) out[i + 4] = 0;
456 if ((i + 5) < n) out[i + 5] = 0;
457 if ((i + 6) < n) out[i + 6] = 0;
458 if ((i + 7) < n) out[i + 7] = 0;
459}
460
461inline void scale_4on5(std::span<const Pixel> in, std::span<Pixel> out)
462{
463 assert((in.size() / 4) == (out.size() / 5));
464 PixelOperations pixelOps;
465 size_t n = out.size();
466 assert((n % 5) == 0);
467 for (size_t i = 0, j = 0; i < n; i += 5, j += 4) {
468 out[i + 0] = in[j + 0];
469 out[i + 1] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 0));
470 out[i + 2] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
471 out[i + 3] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 2));
472 out[i + 4] = in[j + 3];
473 }
474}
475
476inline void scale_7on8(std::span<const Pixel> in, std::span<Pixel> out)
477{
478 assert((in.size() / 7) == (out.size() / 8));
479 PixelOperations pixelOps;
480 size_t n = out.size();
481 assert((n % 8) == 0);
482 for (size_t i = 0, j = 0; i < n; i += 8, j += 7) {
483 out[i + 0] = in[j + 0];
484 out[i + 1] = pixelOps.template blend<1, 6>(subspan<2>(in, j + 0));
485 out[i + 2] = pixelOps.template blend<2, 5>(subspan<2>(in, j + 1));
486 out[i + 3] = pixelOps.template blend<3, 4>(subspan<2>(in, j + 2));
487 out[i + 4] = pixelOps.template blend<4, 3>(subspan<2>(in, j + 3));
488 out[i + 5] = pixelOps.template blend<5, 2>(subspan<2>(in, j + 4));
489 out[i + 6] = pixelOps.template blend<6, 1>(subspan<2>(in, j + 5));
490 out[i + 7] = in[j + 6];
491 }
492}
493
494inline void scale_17on20(std::span<const Pixel> in, std::span<Pixel> out)
495{
496 assert((in.size() / 17) == (out.size() / 20));
497 PixelOperations pixelOps;
498 size_t n = out.size();
499 assert((n % 20) == 0);
500 for (size_t i = 0, j = 0; i < n; i += 20, j += 17) {
501 out[i + 0] = in[j + 0];
502 out[i + 1] = pixelOps.template blend< 3, 14>(subspan<2>(in, j + 0));
503 out[i + 2] = pixelOps.template blend< 6, 11>(subspan<2>(in, j + 1));
504 out[i + 3] = pixelOps.template blend< 9, 8>(subspan<2>(in, j + 2));
505 out[i + 4] = pixelOps.template blend<12, 5>(subspan<2>(in, j + 3));
506 out[i + 5] = pixelOps.template blend<15, 2>(subspan<2>(in, j + 4));
507 out[i + 6] = in[j + 5];
508 out[i + 7] = pixelOps.template blend< 1, 16>(subspan<2>(in, j + 5));
509 out[i + 8] = pixelOps.template blend< 4, 13>(subspan<2>(in, j + 6));
510 out[i + 9] = pixelOps.template blend< 7, 10>(subspan<2>(in, j + 7));
511 out[i + 10] = pixelOps.template blend<10, 7>(subspan<2>(in, j + 8));
512 out[i + 11] = pixelOps.template blend<13, 4>(subspan<2>(in, j + 9));
513 out[i + 12] = pixelOps.template blend<16, 1>(subspan<2>(in, j + 10));
514 out[i + 13] = in[j + 11];
515 out[i + 14] = pixelOps.template blend< 2, 15>(subspan<2>(in, j + 11));
516 out[i + 15] = pixelOps.template blend< 5, 12>(subspan<2>(in, j + 12));
517 out[i + 16] = pixelOps.template blend< 8, 9>(subspan<2>(in, j + 13));
518 out[i + 17] = pixelOps.template blend<11, 6>(subspan<2>(in, j + 14));
519 out[i + 18] = pixelOps.template blend<14, 3>(subspan<2>(in, j + 15));
520 out[i + 19] = in[j + 16];
521 }
522}
523
524inline void scale_9on10(std::span<const Pixel> in, std::span<Pixel> out)
525{
526 assert((in.size() / 9) == (out.size() / 10));
527 PixelOperations pixelOps;
528 size_t n = out.size();
529 assert((n % 10) == 0);
530 for (size_t i = 0, j = 0; i < n; i += 10, j += 9) {
531 out[i + 0] = in[j + 0];
532 out[i + 1] = pixelOps.template blend<1, 8>(subspan<2>(in, j + 0));
533 out[i + 2] = pixelOps.template blend<2, 7>(subspan<2>(in, j + 1));
534 out[i + 3] = pixelOps.template blend<3, 6>(subspan<2>(in, j + 2));
535 out[i + 4] = pixelOps.template blend<4, 5>(subspan<2>(in, j + 3));
536 out[i + 5] = pixelOps.template blend<5, 4>(subspan<2>(in, j + 4));
537 out[i + 6] = pixelOps.template blend<6, 3>(subspan<2>(in, j + 5));
538 out[i + 7] = pixelOps.template blend<7, 2>(subspan<2>(in, j + 6));
539 out[i + 8] = pixelOps.template blend<8, 1>(subspan<2>(in, j + 7));
540 out[i + 9] = in[j + 8];
541 }
542}
543
544template<unsigned w1, unsigned w2>
545void blendLines(std::span<const Pixel> in1, std::span<const Pixel> in2, std::span<Pixel> out)
546{
547 // It _IS_ allowed that the output is the same as one of the inputs.
548 // TODO SSE optimizations
549 // pure C++ version
550 assert(in1.size() == in2.size());
551 assert(in1.size() == out.size());
552 PixelOperations pixelOps;
553 for (auto [i1, i2, o] : view::zip_equal(in1, in2, out)) {
554 o = pixelOps.template blend<w1, w2>(i1, i2);
555 }
556}
557
558inline void alphaBlendLines(
559 std::span<const Pixel> in1, std::span<const Pixel> in2, std::span<Pixel> out)
560{
561 // It _IS_ allowed that the output is the same as one of the inputs.
562 assert(in1.size() == in2.size());
563 assert(in1.size() == out.size());
564 PixelOperations pixelOps;
565 for (auto [i1, i2, o] : view::zip_equal(in1, in2, out)) {
566 o = pixelOps.alphaBlend(i1, i2);
567 }
568}
569
570inline void alphaBlendLines(
571 Pixel in1, std::span<const Pixel> in2, std::span<Pixel> out)
572{
573 // It _IS_ allowed that the output is the same as the input.
574
575 // ATM this routine is only called when 'in1' is not fully opaque nor
576 // fully transparent.
577 assert(in2.size() == out.size());
578
579 PixelOperations pixelOps;
580 unsigned alpha = pixelOps.alpha(in1);
581
582 // When one of the two colors is loop-invariant, using the
583 // pre-multiplied-alpha-blending equation is a tiny bit more efficient
584 // than using alphaBlend() or even lerp().
585 // for (auto i : xrange(width)) {
586 // out[i] = pixelOps.lerp(in1, in2[i], alpha);
587 // }
588 Pixel in1M = pixelOps.multiply(in1, alpha);
589 unsigned alpha2 = 256 - alpha;
590 for (auto [i2, o] : view::zip_equal(in2, out)) {
591 o = in1M + pixelOps.multiply(i2, alpha2);
592 }
593}
594
595} // namespace openmsx
596
597#endif
unsigned alpha(Pixel p) const
static Pixel multiply(Pixel p, unsigned x)
Perform a component wise multiplication of a pixel with an 8-bit fractional value: result = (pixel * ...
Pixel alphaBlend(Pixel p1, Pixel p2) const
Perform alpha blending of two pixels.
imat3 l3(ivec3(0, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
This file implemented 3 utility functions:
Definition Autofire.cc:9
void scale_3on1(std::span< const Pixel > in, std::span< Pixel > out)
void blendLines(std::span< const Pixel > in1, std::span< const Pixel > in2, std::span< Pixel > out)
BlendLines functor Generate an output line that is an interpolation of two input lines.
void scale_2on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_7on8(std::span< const Pixel > in, std::span< Pixel > out)
void scale_2on3(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on9(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on5(std::span< const Pixel > in, std::span< Pixel > out)
void scale_6on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on2(std::span< const Pixel > in, std::span< Pixel > out)
void Scale_1on2(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on3(std::span< const Pixel > in, std::span< Pixel > out)
Scale_XonY functions Transforms an input line of pixel to an output line (possibly) with a different ...
void scale_17on20(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on3(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on2(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on8(std::span< const Pixel > in, std::span< Pixel > out)
void scale_2on9(std::span< const Pixel > in, std::span< Pixel > out)
void alphaBlendLines(std::span< const Pixel > in1, std::span< const Pixel > in2, std::span< Pixel > out)
AlphaBlendLines functor Generate an output line that is a per-pixel-alpha-blend of the two input line...
void scale_9on10(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on4(std::span< const Pixel > in, std::span< Pixel > out)
CharacterConverter::Pixel Pixel
void scale_1on6(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on4(std::span< const Pixel > in, std::span< Pixel > out)
void scale_8on9(std::span< const Pixel > in, std::span< Pixel > out)
void scale_8on3(std::span< const Pixel > in, std::span< Pixel > out)
auto zip_equal(Ranges &&... ranges)
Definition view.hh:559
constexpr auto xrange(T e)
Definition xrange.hh:132