openMSX
SaI3xScaler.cc
Go to the documentation of this file.
1 // 2xSaI is Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
2 // http://elektron.its.tudelft.nl/~dalikifa/
3 // 2xSaI is free under GPL.
4 //
5 // Modified for use in openMSX by Maarten ter Huurne.
6 
7 #include "SaI3xScaler.hh"
8 #include "FrameSource.hh"
9 #include "ScalerOutput.hh"
10 #include "openmsx.hh"
11 #include "build-info.hh"
12 #include <cassert>
13 
14 namespace openmsx {
15 
16 template <typename Pixel>
18  : Scaler3<Pixel>(pixelOps_)
19  , pixelOps(pixelOps_)
20 {
21 }
22 
23 template <class Pixel>
25  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
26  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
27 {
28  unsigned dstHeight = dst.getHeight();
29  unsigned stopDstY = (dstEndY == dstHeight)
30  ? dstEndY : dstEndY - 3;
31  unsigned srcY = srcStartY, dstY = dstStartY;
32  for (/* */; dstY < stopDstY; srcY += 1, dstY += 3) {
33  Pixel color = src.getLinePtr<Pixel>(srcY)[0];
34  for (int i = 0; i < 3; ++i) {
35  dst.fillLine(dstY + i, color);
36  }
37  }
38  if (dstY != dstHeight) {
39  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
40  assert(src.getLineWidth(srcY) == 1);
41  assert(nextLineWidth != 1);
42  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
43  dst, dstY, dstEndY);
44  }
45 }
46 
47 template <typename Pixel>
49 {
50  return pixelOps.template blend<1, 1>(p1, p2);
51 }
52 
53 static const unsigned redblueMask = 0xF81F;
54 static const unsigned greenMask = 0x7E0;
55 
56 template <typename Pixel>
57 static Pixel bilinear(unsigned a, unsigned b, unsigned x);
58 
59 template <>
60 word bilinear<word>(unsigned a, unsigned b, unsigned x)
61 {
62  if (a == b) return a;
63 
64  const unsigned areaB = x >> 11; // reduce 16 bit fraction to 5 bits
65  const unsigned areaA = 0x20 - areaB;
66 
67  a = (a & redblueMask) | ((a & greenMask) << 16);
68  b = (b & redblueMask) | ((b & greenMask) << 16);
69  const unsigned result = ((areaA * a) + (areaB * b)) >> 5;
70  return (result & redblueMask) | ((result >> 16) & greenMask);
71 }
72 
73 template <>
74 unsigned bilinear<unsigned>(unsigned a, unsigned b, unsigned x)
75 {
76  if (a == b) return a;
77 
78  const unsigned areaB = x >> 8; // reduce 16 bit fraction to 8 bits
79  const unsigned areaA = 0x100 - areaB;
80 
81  const unsigned result0 =
82  ((a & 0x00FF00FF) * areaA + (b & 0x00FF00FF) * areaB) >> 8;
83  const unsigned result1 =
84  ((a & 0xFF00FF00) >> 8) * areaA + ((b & 0xFF00FF00) >> 8) * areaB;
85  return (result0 & 0x00FF00FF) | (result1 & 0xFF00FF00);
86 }
87 
88 template <typename Pixel>
89 static Pixel bilinear4(
90  unsigned a, unsigned b, unsigned c, unsigned d, unsigned x, unsigned y );
91 
92 template <>
94  unsigned a, unsigned b, unsigned c, unsigned d, unsigned x, unsigned y
95 ) {
96  x >>= 11;
97  y >>= 11;
98  const unsigned xy = (x * y) >> 5;
99 
100  const unsigned areaA = 0x20 + xy - x - y;
101  const unsigned areaB = x - xy;
102  const unsigned areaC = y - xy;
103  const unsigned areaD = xy;
104 
105  a = (a & redblueMask) | ((a & greenMask) << 16);
106  b = (b & redblueMask) | ((b & greenMask) << 16);
107  c = (c & redblueMask) | ((c & greenMask) << 16);
108  d = (d & redblueMask) | ((d & greenMask) << 16);
109  unsigned result = (
110  (areaA * a) + (areaB * b) + (areaC * c) + (areaD * d)
111  ) >> 5;
112  return (result & redblueMask) | ((result >> 16) & greenMask);
113 }
114 
115 template <>
117  unsigned a, unsigned b, unsigned c, unsigned d, unsigned x, unsigned y
118 ) {
119  x >>= 8;
120  y >>= 8;
121  const unsigned xy = (x * y) >> 8;
122 
123  const unsigned areaA = (1 << 8) + xy - x - y;
124  const unsigned areaB = x - xy;
125  const unsigned areaC = y - xy;
126  const unsigned areaD = xy;
127 
128  const unsigned result0 =
129  ((a & 0x00FF00FF) * areaA + (b & 0x00FF00FF) * areaB +
130  (c & 0x00FF00FF) * areaC + (d & 0x00FF00FF) * areaD) >> 8;
131  const unsigned result1 =
132  ((a & 0xFF00FF00) >> 8) * areaA + ((b & 0xFF00FF00) >> 8) * areaB +
133  ((c & 0xFF00FF00) >> 8) * areaC + ((d & 0xFF00FF00) >> 8) * areaD;
134  return (result0 & 0x00FF00FF) | (result1 & 0xFF00FF00);
135 }
136 
137 template <typename Pixel>
138 class Blender
139 {
140 public:
141  template <unsigned x>
142  inline static Pixel blend(unsigned a, unsigned b);
143 
144  template <unsigned x, unsigned y>
145  inline static Pixel blend(unsigned a, unsigned b, unsigned c, unsigned d);
146 };
147 
148 // require: OLD > NEW
149 template <unsigned X, unsigned OLD, unsigned NEW>
150 struct Round {
151  static const unsigned result =
152  (X >> (OLD - NEW)) + ((X >> (OLD - NEW - 1)) & 1);
153 };
154 
155 template <typename Pixel>
156 template <unsigned x>
157 inline Pixel Blender<Pixel>::blend(unsigned a, unsigned b)
158 {
159  if (a == b) return a;
160 
161  const unsigned bits = (sizeof(Pixel) == 2) ? 5 : 8;
162  const unsigned areaB = Round<x, 16, bits>::result;
163  const unsigned areaA = (1 << bits) - areaB;
164 
165  if (sizeof(Pixel) == 2) {
166  a = (a & redblueMask) | ((a & greenMask) << 16);
167  b = (b & redblueMask) | ((b & greenMask) << 16);
168  const unsigned result = ((areaA * a) + (areaB * b)) >> bits;
169  return (result & redblueMask) | ((result >> 16) & greenMask);
170  } else {
171  const unsigned result0 =
172  ((a & 0x00FF00FF) * areaA +
173  (b & 0x00FF00FF) * areaB) >> bits;
174  const unsigned result1 =
175  ((a & 0xFF00FF00) >> bits) * areaA +
176  ((b & 0xFF00FF00) >> bits) * areaB;
177  return (result0 & 0x00FF00FF) | (result1 & 0xFF00FF00);
178  }
179 }
180 
181 template <typename Pixel>
182 template <unsigned wx, unsigned wy>
184  unsigned a, unsigned b, unsigned c, unsigned d)
185 {
186  const unsigned bits = (sizeof(Pixel) == 2) ? 5 : 8;
187  const unsigned xy = (wx * wy) >> 16;
188  const unsigned areaB = Round<wx - xy, 16, bits>::result;
189  const unsigned areaC = Round<wy - xy, 16, bits>::result;
190  const unsigned areaD = Round<xy, 16, bits>::result;
191  const unsigned areaA = (1 << bits) - areaB - areaC - areaD;
192 
193  if (sizeof(Pixel) == 2) {
194  a = (a & redblueMask) | ((a & greenMask) << 16);
195  b = (b & redblueMask) | ((b & greenMask) << 16);
196  c = (c & redblueMask) | ((c & greenMask) << 16);
197  d = (d & redblueMask) | ((d & greenMask) << 16);
198  unsigned result = (
199  (areaA * a) + (areaB * b) + (areaC * c) + (areaD * d)
200  ) >> bits;
201  return (result & redblueMask) | ((result >> 16) & greenMask);
202  } else {
203  const unsigned result0 =
204  ((a & 0x00FF00FF) * areaA +
205  (b & 0x00FF00FF) * areaB +
206  (c & 0x00FF00FF) * areaC +
207  (d & 0x00FF00FF) * areaD) >> bits;
208  const unsigned result1 =
209  ((a & 0xFF00FF00) >> bits) * areaA +
210  ((b & 0xFF00FF00) >> bits) * areaB +
211  ((c & 0xFF00FF00) >> bits) * areaC +
212  ((d & 0xFF00FF00) >> bits) * areaD;
213  return (result0 & 0x00FF00FF) | (result1 & 0xFF00FF00);
214  }
215 }
216 
217 template <unsigned i>
219 {
220 public:
221  template <typename Pixel>
222  inline static void fill(Pixel*& dp, unsigned sa) {
223  *dp++ = sa;
224  PixelStripRepeater<i - 1>::template fill<Pixel>(dp, sa);
225  }
226 
227  template <unsigned NX, unsigned y, typename Pixel>
228  inline static void blendBackslash(
229  Pixel*& dp,
230  unsigned sa, unsigned sb, unsigned sc, unsigned sd,
231  unsigned se, unsigned sg, unsigned sj, unsigned sl)
232  {
233  // Fractional parts of the fixed point X coordinates.
234  const unsigned x1 = ((NX - i) << 16) / NX;
235  const unsigned y1 = y;
236  const unsigned f1 = (x1 >> 1) + (0x10000 >> 2);
237  const unsigned f2 = (y1 >> 1) + (0x10000 >> 2);
238  if (y1 <= f1 && sa == sj && sa != se) {
239  *dp++ = Blender<Pixel>::template blend<f1 - y1>(sa, sb);
240  } else if (y1 >= f1 && sa == sg && sa != sl) {
241  *dp++ = Blender<Pixel>::template blend<y1 - f1>(sa, sc);
242  } else if (x1 >= f2 && sa == se && sa != sj) {
243  *dp++ = Blender<Pixel>::template blend<x1 - f2>(sa, sb);
244  } else if (x1 <= f2 && sa == sl && sa != sg) {
245  *dp++ = Blender<Pixel>::template blend<f2 - x1>(sa, sc);
246  } else if (y1 >= x1) {
247  *dp++ = Blender<Pixel>::template blend<y1 - x1>(sa, sc);
248  } else if (y1 <= x1) {
249  *dp++ = Blender<Pixel>::template blend<x1 - y1>(sa, sb);
250  }
251  PixelStripRepeater<i - 1>::template blendBackslash<NX, y, Pixel>(
252  dp, sa, sb, sc, sd, se, sg, sj, sl );
253  }
254 
255  template <unsigned NX, unsigned y, typename Pixel>
256  inline static void blendSlash(
257  Pixel*& dp,
258  unsigned sa, unsigned sb, unsigned sc, unsigned sd,
259  unsigned sf, unsigned sh, unsigned si, unsigned sk)
260  {
261  // Fractional parts of the fixed point X coordinates.
262  const unsigned x1 = ((NX - i) << 16) / NX;
263  const unsigned x2 = 0x10000 - x1;
264  const unsigned y1 = y;
265  const unsigned y2 = 0x10000 - y1;
266  const unsigned f1 = (x1 >> 1) + (0x10000 >> 2);
267  const unsigned f2 = (y1 >> 1) + (0x10000 >> 2);
268  if (y2 >= f1 && sb == sh && sb != sf) {
269  *dp++ = Blender<Pixel>::template blend<y2 - f1>(sb, sa);
270  } else if (y2 <= f1 && sb == si && sb != sk) {
271  *dp++ = Blender<Pixel>::template blend<f1 - y2>(sb, sd);
272  } else if (x2 >= f2 && sb == sf && sb != sh) {
273  *dp++ = Blender<Pixel>::template blend<x2 - f2>(sb, sa);
274  } else if (x2 <= f2 && sb == sk && sb != si) {
275  *dp++ = Blender<Pixel>::template blend<f2 - x2>(sb, sd);
276  } else if (y2 >= x1) {
277  *dp++ = Blender<Pixel>::template blend<y2 - x1>(sb, sa);
278  } else if (y2 <= x1) {
279  *dp++ = Blender<Pixel>::template blend<x1 - y2>(sb, sd);
280  }
281  PixelStripRepeater<i - 1>::template blendSlash<NX, y, Pixel>(
282  dp, sa, sb, sc, sd, sf, sh, si, sk );
283  }
284 
285  template <unsigned NX, unsigned y, typename Pixel>
286  inline static void blend4(
287  Pixel*& dp, unsigned sa, unsigned sb, unsigned sc, unsigned sd)
288  {
289  const unsigned x = ((NX - i) << 16) / NX;
290  *dp++ = Blender<Pixel>::template blend<x, y>(sa, sb, sc, sd);
291  PixelStripRepeater<i - 1>::template blend4<NX, y, Pixel>(dp, sa, sb, sc, sd);
292  }
293 };
294 template <>
296 {
297 public:
298  template <typename Pixel>
299  inline static void fill(Pixel*& /*dp*/, unsigned /*sa*/) { }
300 
301  template <unsigned NX, unsigned y, typename Pixel>
302  inline static void blendBackslash(
303  Pixel*& /*dp*/, unsigned /*sa*/, unsigned /*sb*/,
304  unsigned /*sc*/, unsigned /*sd*/, unsigned /*se*/,
305  unsigned /*sg*/, unsigned /*sj*/, unsigned /*sl*/) { }
306 
307  template <unsigned NX, unsigned y, typename Pixel>
308  inline static void blendSlash(
309  Pixel*& /*dp*/, unsigned /*sa*/, unsigned /*sb*/,
310  unsigned /*sc*/, unsigned /*sd*/, unsigned /*sf*/,
311  unsigned /*sh*/, unsigned /*si*/, unsigned /*sk*/) { }
312 
313  template <unsigned NX, unsigned y, typename Pixel>
314  inline static void blend4(Pixel*& /*dp*/, unsigned /*sa*/,
315  unsigned /*sb*/, unsigned /*sc*/, unsigned /*sd*/) { }
316 };
317 
318 template <unsigned i>
320 {
321 public:
322  template <unsigned NX, unsigned NY, typename Pixel>
323  inline static void scaleFixedLine(
324  const Pixel* __restrict src0, const Pixel* __restrict src1,
325  const Pixel* __restrict src2, const Pixel* __restrict src3,
326  unsigned srcWidth, ScalerOutput<Pixel>& dst, unsigned& dstY)
327  {
328  Pixel* dp = dst.acquireLine(dstY);
329  // Calculate fixed point coordinate.
330  const unsigned y1 = ((NY - i) << 16) / NY;
331 
332  unsigned pos1 = 0;
333  unsigned pos2 = 0;
334  unsigned pos3 = 1;
335  Pixel sb = src1[0];
336  Pixel sd = src2[0];
337  for (unsigned srcX = 0; srcX < srcWidth; srcX++) {
338  const unsigned pos0 = pos1;
339  pos1 = pos2;
340  pos2 = pos3;
341  pos3 = std::min(pos1 + 3, srcWidth) - 1;
342  // Get source pixels.
343  const Pixel sa = sb; // current pixel
344  sb = src1[pos2]; // next pixel
345  const Pixel sc = sd;
346  sd = src2[pos2];
347 
348  // Compute and write color of destination pixel.
349  if (sa == sb && sc == sd && sa == sc) {
350  // All the same color; fill.
351  PixelStripRepeater<NX>::template fill<Pixel>(dp, sa);
352  } else if (sa == sd && sb != sc) {
353  // Pattern in the form of a backslash.
354  PixelStripRepeater<NX>::template blendBackslash<NX, y1, Pixel>(
355  dp, sa, sb, sc, sd, src0[pos1], src1[pos0], src2[pos3], src3[pos2]
356  );
357  } else if (sb == sc && sa != sd) {
358  // Pattern in the form of a slash.
359  PixelStripRepeater<NX>::template blendSlash<NX, y1, Pixel>(
360  dp, sa, sb, sc, sd, src0[pos2], src2[pos0], src1[pos3], src3[pos1]
361  );
362  } else {
363  // No pattern; use bilinear interpolatation.
364  PixelStripRepeater<NX>::template blend4<NX, y1, Pixel>(
365  dp, sa, sb, sc, sd
366  );
367  }
368  }
369  dst.releaseLine(dstY, dp);
370  ++dstY;
371 
372  LineRepeater<i - 1>::template scaleFixedLine<NX, NY, Pixel>(
373  src0, src1, src2, src3, srcWidth, dst, dstY);
374  }
375 };
376 template <>
377 class LineRepeater<0>
378 {
379 public:
380  template <unsigned NX, unsigned NY, typename Pixel>
381  inline static void scaleFixedLine(
382  const Pixel* /*src0*/, const Pixel* /*src1*/, const Pixel* /*src2*/,
383  const Pixel* /*src3*/, unsigned /*srcWidth*/,
384  ScalerOutput<Pixel>& /*dst*/, unsigned& /*dstY*/)
385  { }
386 };
387 
388 template <typename Pixel>
389 template <unsigned NX, unsigned NY>
390 void SaI3xScaler<Pixel>::scaleFixed(FrameSource& src,
391  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
392  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
393 {
394  assert(dst.getWidth() == srcWidth * NX);
395  assert(dst.getHeight() == src.getHeight() * NY);
396 
397  int srcY = srcStartY;
398  const Pixel* src0 = src.getLinePtr<Pixel>(srcY - 1, srcWidth);
399  const Pixel* src1 = src.getLinePtr<Pixel>(srcY + 0, srcWidth);
400  const Pixel* src2 = src.getLinePtr<Pixel>(srcY + 1, srcWidth);
401  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY++) {
402  const Pixel* src3 = src.getLinePtr<Pixel>(srcY + 2, srcWidth);
403  LineRepeater<NY>::template scaleFixedLine<NX, NY, Pixel>(
404  src0, src1, src2, src3, srcWidth, dst, dstY);
405  src0 = src1;
406  src1 = src2;
407  src2 = src3;
408  }
409 }
410 
411 template <typename Pixel>
412 void SaI3xScaler<Pixel>::scaleAny(FrameSource& src,
413  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
414  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY) __restrict
415 {
416  // Calculate fixed point end coordinates and deltas.
417  const unsigned wfinish = (srcWidth - 1) << 16;
418  const unsigned dw = wfinish / (dst.getWidth() - 1);
419  const unsigned hfinish = (src.getHeight() - 1) << 16;
420  const unsigned dh = hfinish / (dst.getHeight() - 1);
421 
422  unsigned h = 0;
423  for (unsigned dstY = dstStartY; dstY < dstEndY; dstY++) {
424  // Get source line pointers.
425  int line = srcStartY + (h >> 16);
426  // TODO possible optimization: reuse srcN from previous step
427  const Pixel* __restrict src0 = src.getLinePtr<Pixel>(line - 1, srcWidth);
428  const Pixel* __restrict src1 = src.getLinePtr<Pixel>(line + 0, srcWidth);
429  const Pixel* __restrict src2 = src.getLinePtr<Pixel>(line + 1, srcWidth);
430  const Pixel* __restrict src3 = src.getLinePtr<Pixel>(line + 2, srcWidth);
431 
432  // Get destination line pointer.
433  Pixel* dstLine = dst.acquireLine(dstY);
434  Pixel* __restrict dp = dstLine;
435 
436  // Fractional parts of the fixed point Y coordinates.
437  const unsigned y1 = h & 0xffff;
438  const unsigned y2 = 0x10000 - y1;
439  // Next line.
440  h += dh;
441 
442  unsigned pos1 = 0;
443  unsigned pos2 = 0;
444  unsigned pos3 = 1;
445  Pixel B = src1[0];
446  Pixel D = src2[0];
447  for (unsigned w = 0; w < wfinish; ) {
448  const unsigned pos0 = pos1;
449  pos1 = pos2;
450  pos2 = pos3;
451  pos3 = std::min(pos1 + 3, srcWidth) - 1;
452  // Get source pixels.
453  const Pixel A = B; // current pixel
454  B = src1[pos2]; // next pixel
455  const Pixel C = D;
456  D = src2[pos2];
457 
458  // Compute and write color of destination pixel.
459  if (A == B && C == D && A == C) { // 0
460  do {
461  *dp++ = A;
462  w += dw;
463  } while ((w >> 16) == pos1);
464  } else if (A == D && B != C) { // 1
465  do {
466  // Fractional parts of the fixed point X coordinates.
467  const unsigned x1 = w & 0xffff;
468  const unsigned f1 = (x1 >> 1) + (0x10000 >> 2);
469  const unsigned f2 = (y1 >> 1) + (0x10000 >> 2);
470  Pixel product1;
471  if (y1 <= f1 && A == src2[pos3] && A != src0[pos1]) { // close to B
472  product1 = bilinear<Pixel>(A, B, f1 - y1);
473  } else if (y1 >= f1 && A == src1[pos0] && A != src3[pos2]) { // close to C
474  product1 = bilinear<Pixel>(A, C, y1 - f1);
475  } else if (x1 >= f2 && A == src0[pos1] && A != src2[pos3]) { // close to B
476  product1 = bilinear<Pixel>(A, B, x1 - f2);
477  } else if (x1 <= f2 && A == src3[pos2] && A != src1[pos0]) { // close to C
478  product1 = bilinear<Pixel>(A, C, f2 - x1);
479  } else if (y1 >= x1) { // close to C
480  product1 = bilinear<Pixel>(A, C, y1 - x1);
481  } else {
482  assert(y1 < x1); // close to B
483  product1 = bilinear<Pixel>(A, B, x1 - y1);
484  }
485  *dp++ = product1;
486  w += dw;
487  } while ((w >> 16) == pos1);
488  } else if (B == C && A != D) { // 2
489  do {
490  // Fractional parts of the fixed point X coordinates.
491  const unsigned x1 = w & 0xffff;
492  const unsigned x2 = 0x10000 - x1;
493  const unsigned f1 = (x1 >> 1) + (0x10000 >> 2);
494  const unsigned f2 = (y1 >> 1) + (0x10000 >> 2);
495  Pixel product1;
496  if (y2 >= f1 && B == src2[pos0] && B != src0[pos2]) { // close to A
497  product1 = bilinear<Pixel>(B, A, y2 - f1);
498  } else if (y2 <= f1 && B == src1[pos3] && B != src3[pos1]) { // close to D
499  product1 = bilinear<Pixel>(B, D, f1 - y2);
500  } else if (x2 >= f2 && B == src0[pos2] && B != src2[pos0]) { // close to A
501  product1 = bilinear<Pixel>(B, A, x2 - f2);
502  } else if (x2 <= f2 && B == src3[pos1] && B != src1[pos3]) { // close to D
503  product1 = bilinear<Pixel>(B, D, f2 - x2);
504  } else if (y2 >= x1) { // close to A
505  product1 = bilinear<Pixel>(B, A, y2 - x1);
506  } else {
507  assert(y2 < x1); // close to D
508  product1 = bilinear<Pixel>(B, D, x1 - y2);
509  }
510  *dp++ = product1;
511  w += dw;
512  } while ((w >> 16) == pos1);
513  } else { // 3
514  do {
515  // Fractional parts of the fixed point X coordinates.
516  const unsigned x1 = w & 0xffff;
517  *dp++ = bilinear4<Pixel>(A, B, C, D, x1, y1);
518  w += dw;
519  } while ((w >> 16) == pos1);
520  }
521  }
522  dst.releaseLine(dstY, dstLine);
523  }
524 }
525 
526 template <typename Pixel>
528  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
529  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
530 {
531  scaleFixed<3, 3>(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY);
532 }
533 
534 
535 // Force template instantiation.
536 #if HAVE_16BPP
537 template class SaI3xScaler<word>;
538 #endif
539 #if HAVE_32BPP
540 template class SaI3xScaler<unsigned>;
541 #endif
542 
543 } // namespace openmsx