openMSX
Simple3xScaler.cc
Go to the documentation of this file.
1 #include "Simple3xScaler.hh"
3 #include "LineScalers.hh"
4 #include "RawFrame.hh"
5 #include "ScalerOutput.hh"
6 #include "RenderSettings.hh"
7 #include "Multiply32.hh"
8 #include "vla.hh"
9 #include "memory.hh"
10 #include <cstdint>
11 #ifdef __SSE2__
12 #include <emmintrin.h>
13 #endif
14 
15 namespace openmsx {
16 
17 template <class Pixel> class Blur_1on3
18 {
19 public:
20  Blur_1on3(const PixelOperations<Pixel>& pixelOps);
21  inline void setBlur(unsigned blur_) { blur = blur_; }
22  void operator()(const Pixel* in, Pixel* out, size_t dstWidth);
23 private:
24  Multiply32<Pixel> mult0;
25  Multiply32<Pixel> mult1;
26  Multiply32<Pixel> mult2;
27  Multiply32<Pixel> mult3;
28  unsigned blur;
29 #ifdef __SSE2__
30  void blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth);
31 #endif
32 };
33 
34 
35 template <class Pixel>
37  const PixelOperations<Pixel>& pixelOps_,
38  const RenderSettings& settings_)
39  : Scaler3<Pixel>(pixelOps_)
40  , pixelOps(pixelOps_)
41  , scanline(pixelOps_)
42  , blur_1on3(make_unique<Blur_1on3<Pixel>>(pixelOps_))
43  , settings(settings_)
44 {
45 }
46 
47 template <class Pixel>
49 {
50 }
51 
52 template <typename Pixel>
54  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
55  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
57 {
58  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
59  int scanlineFactor = settings.getScanlineFactor();
60  unsigned dstWidth = dst.getWidth();
61  unsigned y = dstStartY;
62  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
63  auto* dstLine0 = dst.acquireLine(y + 0);
64  scale(srcLine, dstLine0, dstWidth);
65 
66  Scale_1on1<Pixel> copy;
67  auto* dstLine1 = dst.acquireLine(y + 1);
68  copy(dstLine0, dstLine1, dstWidth);
69 
70  for (/* */; (y + 4) < dstEndY; y += 3, srcStartY += 1) {
71  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
72  auto* dstLine3 = dst.acquireLine(y + 3);
73  scale(srcLine, dstLine3, dstWidth);
74 
75  auto* dstLine4 = dst.acquireLine(y + 4);
76  copy(dstLine3, dstLine4, dstWidth);
77 
78  auto* dstLine2 = dst.acquireLine(y + 2);
79  scanline.draw(dstLine0, dstLine3, dstLine2,
80  scanlineFactor, dstWidth);
81 
82  dst.releaseLine(y + 0, dstLine0);
83  dst.releaseLine(y + 1, dstLine1);
84  dst.releaseLine(y + 2, dstLine2);
85  dstLine0 = dstLine3;
86  dstLine1 = dstLine4;
87  }
88  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
89  VLA_SSE_ALIGNED(Pixel, buf2, dstWidth);
90  scale(srcLine, buf2, dstWidth);
91 
92  auto* dstLine2 = dst.acquireLine(y + 2);
93  scanline.draw(dstLine0, buf2, dstLine2, scanlineFactor, dstWidth);
94  dst.releaseLine(y + 0, dstLine0);
95  dst.releaseLine(y + 1, dstLine1);
96  dst.releaseLine(y + 2, dstLine2);
97 }
98 
99 template <typename Pixel>
100 void Simple3xScaler<Pixel>::doScale2(FrameSource& src,
101  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
102  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
103  PolyLineScaler<Pixel>& scale)
104 {
105  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
106  int scanlineFactor = settings.getScanlineFactor();
107  unsigned dstWidth = dst.getWidth();
108  for (unsigned srcY = srcStartY, dstY = dstStartY; dstY < dstEndY;
109  srcY += 2, dstY += 3) {
110  auto* srcLine0 = src.getLinePtr(srcY + 0, srcWidth, buf);
111  auto* dstLine0 = dst.acquireLine(dstY + 0);
112  scale(srcLine0, dstLine0, dstWidth);
113 
114  auto* srcLine1 = src.getLinePtr(srcY + 1, srcWidth, buf);
115  auto* dstLine2 = dst.acquireLine(dstY + 2);
116  scale(srcLine1, dstLine2, dstWidth);
117 
118  auto* dstLine1 = dst.acquireLine(dstY + 1);
119  scanline.draw(dstLine0, dstLine2, dstLine1,
120  scanlineFactor, dstWidth);
121 
122  dst.releaseLine(dstY + 0, dstLine0);
123  dst.releaseLine(dstY + 1, dstLine1);
124  dst.releaseLine(dstY + 2, dstLine2);
125  }
126 }
127 
128 template <class Pixel>
130  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
131  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
132 {
134  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
135 }
136 
137 template <class Pixel>
139  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
140  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
141 {
143  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
144 }
145 
146 template <class Pixel>
148  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
149  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
150 {
151  if (unsigned blur = settings.getBlurFactor() / 3) {
152  blur_1on3->setBlur(blur);
153  PolyScaleRef<Pixel, Blur_1on3<Pixel>> op(*blur_1on3);
154  doScale1(src, srcStartY, srcEndY, srcWidth,
155  dst, dstStartY, dstEndY, op);
156  } else {
157  // No blurring: this is an optimization but it's also needed
158  // for correctness (otherwise there's an overflow in 0.16 fixed
159  // point arithmetic).
161  doScale1(src, srcStartY, srcEndY, srcWidth,
162  dst, dstStartY, dstEndY, op);
163  }
164 }
165 
166 template <class Pixel>
168  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
169  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
170 {
172  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
173 }
174 
175 template <class Pixel>
177  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
178  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
179 {
181  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
182 }
183 
184 template <class Pixel>
186  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
187  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
188 {
190  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
191 }
192 
193 template <class Pixel>
195  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
196  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
197 {
199  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
200 }
201 
202 template <class Pixel>
204  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
205  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
206 {
208  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
209 }
210 
211 template <class Pixel>
213  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
214  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
215 {
217  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
218 }
219 
220 template <class Pixel>
222  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
223  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
224 {
226  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
227 }
228 
229 template <class Pixel>
231  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
232  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
233 {
235  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
236 }
237 
238 template <class Pixel>
240  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
241  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
242 {
244  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
245 }
246 
247 template <class Pixel>
249  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
250  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
251 {
252  int scanlineFactor = settings.getScanlineFactor();
253 
254  unsigned dstHeight = dst.getHeight();
255  unsigned stopDstY = (dstEndY == dstHeight)
256  ? dstEndY : dstEndY - 3;
257  unsigned srcY = srcStartY, dstY = dstStartY;
258  for (/* */; dstY < stopDstY; srcY += 1, dstY += 3) {
259  Pixel color0 = src.getLineColor<Pixel>(srcY);
260  Pixel color1 = scanline.darken(color0, scanlineFactor);
261  dst.fillLine(dstY + 0, color0);
262  dst.fillLine(dstY + 1, color0);
263  dst.fillLine(dstY + 2, color1);
264  }
265  if (dstY != dstHeight) {
266  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
267  assert(src.getLineWidth(srcY) == 1);
268  assert(nextLineWidth != 1);
269  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
270  dst, dstY, dstEndY);
271  }
272 }
273 
274 template <class Pixel>
276  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/,
277  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
278 {
279  int scanlineFactor = settings.getScanlineFactor();
280  for (unsigned srcY = srcStartY, dstY = dstStartY;
281  dstY < dstEndY; srcY += 2, dstY += 3) {
282  Pixel color0 = src.getLineColor<Pixel>(srcY + 0);
283  Pixel color1 = src.getLineColor<Pixel>(srcY + 1);
284  Pixel color01 = scanline.darken(color0, color1, scanlineFactor);
285  dst.fillLine(dstY + 0, color0);
286  dst.fillLine(dstY + 1, color01);
287  dst.fillLine(dstY + 2, color1);
288  }
289 }
290 
291 
292 // class Blur_1on3
293 
294 template <class Pixel>
296  : mult0(pixelOps)
297  , mult1(pixelOps)
298  , mult2(pixelOps)
299  , mult3(pixelOps)
300 {
301 }
302 
303 #ifdef __SSE2__
304 template<class Pixel>
305 void Blur_1on3<Pixel>::blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
306 {
307  if (sizeof(Pixel) != 4) {
308  assert(false); return; // only 32-bpp
309  }
310 
311  assert((srcWidth % 4) == 0);
312  assert(srcWidth >= 8);
313  assert((size_t(in_ ) % 16) == 0);
314  assert((size_t(out_) % 16) == 0);
315 
316  unsigned alpha = blur * 256;
317  unsigned c0 = alpha / 2;
318  unsigned c1 = alpha + c0;
319  unsigned c2 = 0x10000 - c1;
320  unsigned c3 = 0x10000 - alpha;
321  __m128i C0C1 = _mm_set_epi16(c1, c1, c1, c1, c0, c0, c0, c0);
322  __m128i C1C0 = _mm_shuffle_epi32(C0C1, 0x4E);
323  __m128i C2C3 = _mm_set_epi16(c3, c3, c3, c3, c2, c2, c2, c2);
324  __m128i C3C2 = _mm_shuffle_epi32(C2C3, 0x4E);
325 
326  size_t tmp = srcWidth - 4;
327  auto* in = reinterpret_cast<const char*>(in_ + tmp);
328  auto* out = reinterpret_cast< char*>(out_ + 3 * tmp);
329  auto x = -ptrdiff_t(tmp * sizeof(Pixel));
330 
331  __m128i ZERO = _mm_setzero_si128();
332 
333  // Prepare first iteration (duplicate left border pixel)
334  __m128i abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x));
335  __m128i a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
336  __m128i a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
337  __m128i a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
338  __m128i d1d0 = _mm_shuffle_epi32(a0a1, 0x4E); // left border
339 
340  // At the start of each iteration the follwoing vars are live:
341  // abcd, a_b_, a_a_, a0a1, d1d0
342  // Each iteration reads 4 and produces 12 pixels.
343  do {
344  // p01
345  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
346  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
347  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
348  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
349  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
350  // p23
351  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
352  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
353  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
354  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
355  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
356  __m128i p0123 = _mm_packus_epi16(p01, p23);
357  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 0),
358  p0123);
359 
360  // p45
361  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
362  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
363  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
364  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
365  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
366  // p67
367  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
368  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
369  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
370  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
371  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
372  __m128i p4567 = _mm_packus_epi16(p45, p67);
373  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 16),
374  p4567);
375 
376  // p89
377  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
378  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
379  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
380  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
381  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
382  // pab
383  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
384  abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 16));
385  a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
386  a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
387  a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
388  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
389  __m128i p89ab = _mm_packus_epi16(p89, pab);
390  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 32),
391  p89ab);
392 
393  x += 16;
394  } while (x < 0);
395 
396  // Last iteration (duplicate right border pixel)
397  // p01
398  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
399  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
400  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
401  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
402  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
403  // p23
404  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
405  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
406  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
407  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
408  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
409  __m128i p0123 = _mm_packus_epi16(p01, p23);
410  _mm_store_si128(reinterpret_cast<__m128i*>(out + 0),
411  p0123);
412 
413  // p45
414  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
415  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
416  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
417  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
418  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
419  // p67
420  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
421  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
422  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
423  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
424  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
425  __m128i p4567 = _mm_packus_epi16(p45, p67);
426  _mm_store_si128(reinterpret_cast<__m128i*>(out + 16),
427  p4567);
428 
429  // p89
430  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
431  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
432  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
433  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
434  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
435  // pab
436  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
437  a0a1 = _mm_shuffle_epi32(d1d0, 0x4E); // right border
438  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
439  __m128i p89ab = _mm_packus_epi16(p89, pab);
440  _mm_store_si128(reinterpret_cast<__m128i*>(out + 32),
441  p89ab);
442 }
443 #endif
444 
445 template <class Pixel>
447  const Pixel* __restrict in, Pixel* __restrict out,
448  size_t dstWidth)
449 {
450  /* The following code is equivalent to this loop. It is 2x unrolled
451  * and common subexpressions have been eliminated. The last iteration
452  * is also moved outside the for loop.
453  *
454  * unsigned c0 = blur / 2;
455  * unsigned c1 = c0 + blur;
456  * unsigned c2 = 256 - c1;
457  * unsigned c3 = 256 - 2 * c0;
458  * Pixel prev, curr, next;
459  * prev = curr = next = in[0];
460  * size_t srcWidth = dstWidth / 3;
461  * for (unsigned x = 0; x < srcWidth; ++x) {
462  * if (x != (srcWidth - 1)) next = in[x + 1];
463  * out[3 * x + 0] = mul(c1, prev) + mul(c2, curr);
464  * out[3 * x + 1] = mul(c0, prev) + mul(c3, curr) + mul(c0, next);
465  * out[3 * x + 2] = mul(c2, curr) + mul(c1, next);
466  * prev = curr;
467  * curr = next;
468  * }
469  */
470  size_t srcWidth = dstWidth / 3;
471 #ifdef __SSE2__
472  if (sizeof(Pixel) == 4) {
473  blur_SSE(in, out, srcWidth);
474  return;
475  }
476 #endif
477 
478  // C++ routine, both 16bpp and 32bpp
479  unsigned c0 = blur / 2;
480  unsigned c1 = blur + c0;
481  unsigned c2 = 256 - c1;
482  unsigned c3 = 256 - 2 * c0;
483  mult0.setFactor32(c0);
484  mult1.setFactor32(c1);
485  mult2.setFactor32(c2);
486  mult3.setFactor32(c3);
487 
488  Pixel p0 = in[0];
489  Pixel p1;
490  uint32_t f0 = mult0.mul32(p0);
491  uint32_t f1 = mult1.mul32(p0);
492  uint32_t g0 = f0;
493  uint32_t g1 = f1;
494 
495  size_t x;
496  for (x = 0; x < (srcWidth - 2); x += 2) {
497  uint32_t g2 = mult2.mul32(p0);
498  out[3 * x + 0] = mult0.conv32(g2 + f1);
499  p1 = in[x + 1];
500  uint32_t t0 = mult0.mul32(p1);
501  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
502  f0 = t0;
503  f1 = mult1.mul32(p1);
504  out[3 * x + 2] = mult0.conv32(g2 + f1);
505 
506  uint32_t f2 = mult2.mul32(p1);
507  out[3 * x + 3] = mult0.conv32(f2 + g1);
508  p0 = in[x + 2];
509  uint32_t t1 = mult0.mul32(p0);
510  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + t1);
511  g0 = t1;
512  g1 = mult1.mul32(p0);
513  out[3 * x + 5] = mult0.conv32(g1 + f2);
514  }
515  uint32_t g2 = mult2.mul32(p0);
516  out[3 * x + 0] = mult0.conv32(g2 + f1);
517  p1 = in[x + 1];
518  uint32_t t0 = mult0.mul32(p1);
519  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
520  f0 = t0;
521  f1 = mult1.mul32(p1);
522  out[3 * x + 2] = mult0.conv32(g2 + f1);
523 
524  uint32_t f2 = mult2.mul32(p1);
525  out[3 * x + 3] = mult0.conv32(f2 + g1);
526  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + f0);
527  out[3 * x + 5] = p1;
528 }
529 
530 template <class Pixel>
532  FrameSource& src, const RawFrame* superImpose,
533  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
534  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
535 {
536  if (superImpose) {
537  SuperImposedVideoFrame<Pixel> sf(src, *superImpose, pixelOps);
538  srcWidth = sf.getLineWidth(srcStartY);
539  this->dispatchScale(sf, srcStartY, srcEndY, srcWidth,
540  dst, dstStartY, dstEndY);
541  } else {
542  this->dispatchScale(src, srcStartY, srcEndY, srcWidth,
543  dst, dstStartY, dstEndY);
544  }
545 }
546 
547 // Force template instantiation.
548 #if HAVE_16BPP
549 template class Simple3xScaler<uint16_t>;
550 #endif
551 #if HAVE_32BPP
552 template class Simple3xScaler<uint32_t>;
553 #endif
554 
555 } // namespace openmsx
This class represents a frame that is the (per-pixel) alpha-blend of a (laser-disc) video frame and a...
Helper class to perform 'pixel x scalar' calculations.
Definition: Multiply32.hh:14
void setBlur(unsigned blur_)
const Pixel getLineColor(unsigned line) const
Get the (single) color of the given line.
Definition: FrameSource.hh:78
virtual void fillLine(unsigned y, Pixel color)=0
Simple3xScaler(const PixelOperations< Pixel > &pixelOps, const RenderSettings &renderSettings)
virtual Pixel * acquireLine(unsigned y)=0
virtual void scale2x2to9x3(FrameSource &src, unsigned srcstarty, unsigned srcendy, unsigned srcwidth, ScalerOutput< Pixel > &dst, unsigned dststarty, unsigned dstendy)
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:310
Polymorphic line scaler.
Definition: LineScalers.hh:282
virtual void scale4x2to9x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual unsigned getLineWidth(unsigned line) const
Gets the number of display pixels on the given line.
virtual void scale2x1to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Interface for getting lines from a video frame.
Definition: FrameSource.hh:15
mat4 scale(const vec3 &xyz)
Definition: gl_transform.hh:19
virtual void scale4x2to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual unsigned getHeight() const =0
A video frame as output by the VDP scanline conversion unit, before any postprocessing filters are ap...
Definition: RawFrame.hh:13
unsigned Pixel
virtual void scale1x2to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Blur_1on3(const PixelOperations< Pixel > &pixelOps)
virtual void releaseLine(unsigned y, Pixel *buf)=0
virtual void scaleImage(FrameSource &src, const RawFrame *superImpose, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Scales the image in the given area, which must consist of lines which are all equally wide...
virtual void scaleBlank2to3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual void scale2x1to9x3(FrameSource &src, unsigned srcstarty, unsigned srcendy, unsigned srcwidth, ScalerOutput< Pixel > &dst, unsigned dststarty, unsigned dstendy)
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:95
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:337
virtual void scale1x1to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual void scale4x1to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual void scale2x2to3x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual void scale8x2to9x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Class containing all settings for renderers.
virtual void scale8x1to9x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
void operator()(const Pixel *in, Pixel *out, size_t dstWidth)
virtual void scaleBlank1to3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
virtual unsigned getLineWidth(unsigned line) const =0
Gets the number of display pixels on the given line.
std::unique_ptr< T > make_unique()
Definition: memory.hh:27
virtual unsigned getWidth() const =0
virtual void scale4x1to9x3(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
Base class for 3x scalers.
Definition: Scaler3.hh:11